convert captured images to base64 and send them as messages whenever an audio message is sent

pull/34/head
Tom Chapin 12 months ago
parent ed4db2aa45
commit b83c7fb261

@ -23,6 +23,7 @@ import wave
import tempfile import tempfile
from datetime import datetime from datetime import datetime
import cv2 import cv2
import base64
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run? from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
from ..server.utils.kernel import put_kernel_messages_into_queue from ..server.utils.kernel import put_kernel_messages_into_queue
from ..server.utils.get_system_info import get_system_info from ..server.utils.get_system_info import get_system_info
@ -76,7 +77,7 @@ class Device:
if ret: if ret:
temp_dir = tempfile.gettempdir() temp_dir = tempfile.gettempdir()
image_path = os.path.join(temp_dir, f"01_photo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.jpg") image_path = os.path.join(temp_dir, f"01_photo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.png")
self.captured_images.append(image_path) self.captured_images.append(image_path)
cv2.imwrite(image_path, frame) cv2.imwrite(image_path, frame)
logger.info(f"Camera image captured to {image_path}") logger.info(f"Camera image captured to {image_path}")
@ -89,6 +90,31 @@ class Device:
return image_path return image_path
def encode_image_to_base64(self, image_path):
"""Encodes an image file to a base64 string."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def add_image_to_send_queue(self, image_path):
"""Encodes an image and adds an LMC message to the send queue with the image data."""
base64_image = self.encode_image_to_base64(image_path)
image_message = {
"role": "user",
"type": "image",
"format": "base64.png",
"content": base64_image
}
send_queue.put(image_message)
# Delete the image file from the file system after sending it
os.remove(image_path)
def queue_all_captured_images(self):
"""Queues all captured images to be sent."""
for image_path in self.captured_images:
self.add_image_to_send_queue(image_path)
self.captured_images.clear() # Clear the list after sending
def record_audio(self): def record_audio(self):
if os.getenv('STT_RUNNER') == "server": if os.getenv('STT_RUNNER') == "server":
@ -132,6 +158,8 @@ class Device:
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": ""}) send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": ""})
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True}) send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
else: else:
self.queue_all_captured_images()
if os.getenv('STT_RUNNER') == "client": if os.getenv('STT_RUNNER') == "client":
# Run stt then send text # Run stt then send text
text = stt_wav(wav_path) text = stt_wav(wav_path)

Loading…
Cancel
Save