Merge pull request #34 from tomchapin/feature/camera-snapshots

Feature/camera snapshots (WIP)
pull/52/head
killian 1 year ago committed by GitHub
commit f2e51dd14f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

1
.gitignore vendored

@ -167,3 +167,4 @@ cython_debug/
# ignore the aifs index files
_.aifs
01OS/output_audio.wav

@ -35,6 +35,17 @@ STT_RUNNER=client # If server, audio will be sent over websocket.
# Will expose the server publically and display that URL.
SERVER_EXPOSE_PUBLICALLY=False
# Image capture settings
CAMERA_ENABLED=True
# Camera device selection (Typically 0 for built-in, 1 for USB)
CAMERA_DEVICE_INDEX=0
# Camera warmup time
# This is a workaround for some cameras that don't immediately
# return a properly exposed picture when they are first turned on
CAMERA_WARMUP_SECONDS=0.4
# Debug level
# LOG_LEVEL=DEBUG
LOG_LEVEL="INFO"

@ -1,6 +1,7 @@
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
import os
import asyncio
import threading
import os
@ -21,6 +22,8 @@ import time
import wave
import tempfile
from datetime import datetime
import cv2
import base64
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
# In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic?
from ..server.utils.kernel import put_kernel_messages_into_queue
@ -44,6 +47,11 @@ RATE = 44100 # Sample rate
RECORDING = False # Flag to control recording state
SPACEBAR_PRESSED = False # Flag to track spacebar press state
# Camera configuration
CAMERA_ENABLED = bool(os.getenv('CAMERA_ENABLED', False))
CAMERA_DEVICE_INDEX = int(os.getenv('CAMERA_DEVICE_INDEX', 0))
CAMERA_WARMUP_SECONDS = float(os.getenv('CAMERA_WARMUP_SECONDS', 0))
# Specify OS
current_platform = get_system_info()
@ -54,8 +62,63 @@ send_queue = queue.Queue()
class Device:
def __init__(self):
self.pressed_keys = set()
self.captured_images = []
self.audiosegments = []
pass
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
image_path = None
cap = cv2.VideoCapture(camera_index)
ret, frame = cap.read() # Capture a single frame to initialize the camera
if CAMERA_WARMUP_SECONDS > 0:
# Allow camera to warm up, then snap a picture again
# This is a workaround for some cameras that don't return a properly exposed
# picture immediately when they are first turned on
time.sleep(CAMERA_WARMUP_SECONDS)
ret, frame = cap.read()
if ret:
temp_dir = tempfile.gettempdir()
image_path = os.path.join(temp_dir, f"01_photo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.png")
self.captured_images.append(image_path)
cv2.imwrite(image_path, frame)
logger.info(f"Camera image captured to {image_path}")
logger.info(f"You now have {len(self.captured_images)} images which will be sent along with your next audio message.")
else:
logger.error(f"Error: Couldn't capture an image from camera ({camera_index})")
cap.release()
return image_path
def encode_image_to_base64(self, image_path):
"""Encodes an image file to a base64 string."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def add_image_to_send_queue(self, image_path):
"""Encodes an image and adds an LMC message to the send queue with the image data."""
base64_image = self.encode_image_to_base64(image_path)
image_message = {
"role": "user",
"type": "image",
"format": "base64.png",
"content": base64_image
}
send_queue.put(image_message)
# Delete the image file from the file system after sending it
os.remove(image_path)
def queue_all_captured_images(self):
"""Queues all captured images to be sent."""
for image_path in self.captured_images:
self.add_image_to_send_queue(image_path)
self.captured_images.clear() # Clear the list after sending
async def play_audiosegments(self):
"""Plays them sequentially."""
@ -112,6 +175,8 @@ class Device:
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "content": ""})
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "end": True})
else:
self.queue_all_captured_images()
if os.getenv('STT_RUNNER') == "client":
# Run stt then send text
text = stt_wav(wav_path)
@ -142,17 +207,27 @@ class Device:
RECORDING = False
def on_press(self, key):
"""Detect spacebar press."""
if key == keyboard.Key.space:
"""Detect spacebar press, ESC key press, and Ctrl+C combination."""
self.pressed_keys.add(key) # Add the pressed key to the set
if keyboard.Key.esc in self.pressed_keys:
logger.info("Exiting...")
os._exit(0)
elif keyboard.Key.space in self.pressed_keys:
self.toggle_recording(True)
elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char('c')} <= self.pressed_keys:
logger.info("Ctrl+C pressed. Exiting...")
os._exit(0)
def on_release(self, key):
"""Detect spacebar release and ESC key press."""
"""Detect spacebar release and 'c' key press for camera, and handle key release."""
self.pressed_keys.discard(key) # Remove the released key from the key press tracking set
if key == keyboard.Key.space:
self.toggle_recording(False)
elif key == keyboard.Key.esc or (key == keyboard.Key.ctrl and keyboard.Key.c):
logger.info("Exiting...")
os._exit(0)
elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'):
self.fetch_image_from_camera()
async def message_sender(self, websocket):
while True:
@ -168,7 +243,11 @@ class Device:
while True:
try:
async with websockets.connect(WS_URL) as websocket:
logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
if CAMERA_ENABLED:
logger.info("Press the spacebar to start/stop recording. Press 'c' to capture an image from the camera. Press ESC to exit.")
else:
logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
asyncio.create_task(self.message_sender(websocket))
while True:

@ -1,8 +1,8 @@
DEVICE=$(uname -n)
if [[ "$DEVICE" == "rpi" ]]; then
cd 01OS
python -m 01OS.clients.rpi.device &
python -m 01OS.clients.rpi.device
else
cd 01OS
python -m 01OS.clients.macos.device &
python -m 01OS.clients.macos.device
fi

27
01OS/poetry.lock generated

@ -1890,6 +1890,31 @@ typing-extensions = ">=4.7,<5"
[package.extras]
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
[[package]]
name = "opencv-python"
version = "4.9.0.80"
description = "Wrapper package for OpenCV python bindings."
optional = false
python-versions = ">=3.6"
files = [
{file = "opencv-python-4.9.0.80.tar.gz", hash = "sha256:1a9f0e6267de3a1a1db0c54213d022c7c8b5b9ca4b580e80bdc58516c922c9e1"},
{file = "opencv_python-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:7e5f7aa4486651a6ebfa8ed4b594b65bd2d2f41beeb4241a3e4b1b85acbbbadb"},
{file = "opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71dfb9555ccccdd77305fc3dcca5897fbf0cf28b297c51ee55e079c065d812a3"},
{file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b34a52e9da36dda8c151c6394aed602e4b17fa041df0b9f5b93ae10b0fcca2a"},
{file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4088cab82b66a3b37ffc452976b14a3c599269c247895ae9ceb4066d8188a57"},
{file = "opencv_python-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:dcf000c36dd1651118a2462257e3a9e76db789a78432e1f303c7bac54f63ef6c"},
{file = "opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:3f16f08e02b2a2da44259c7cc712e779eff1dd8b55fdb0323e8cab09548086c0"},
]
[package.dependencies]
numpy = [
{version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
{version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
]
[[package]]
name = "packaging"
version = "23.2"
@ -3514,4 +3539,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata]
lock-version = "2.0"
python-versions = ">=3.9,<3.12"
content-hash = "12ccff8a2521e7eb88eee82cfd3de409fea8e1658406d6148a42f9347ca7b2a7"
content-hash = "5c8d587b405e97c0dca454078950157106f9aea687cbecce5b7ae7effd2aeece"

@ -25,6 +25,7 @@ pydub = "^0.25.1"
ngrok = "^1.0.0"
open-interpreter = "^0.2.0"
simpleaudio = "^1.0.4"
opencv-python = "^4.9.0.80"
[build-system]
requires = ["poetry-core"]

@ -1,5 +1,12 @@
#!/usr/bin/env bash
# Set python to prioritize the module files from the current directory
# If we don't do this, then the python interpreter will not be able to find the modules,
# and will throw an error like "ModuleNotFoundError: No module named '01OS'".
# If we solve the problem by pip installing the official 01OS package, then those
# modules will run instead of the local ones that we are trying to develop with.
export PYTHONPATH="$(pwd):$PYTHONPATH"
### Import Environment Variables from .env
SCRIPT_DIR="$(dirname "$0")"
if [ ! -f "$SCRIPT_DIR/.env" ]; then

Loading…
Cancel
Save