diff --git a/.gitignore b/.gitignore index ff21f10..cd3f4ed 100644 --- a/.gitignore +++ b/.gitignore @@ -167,3 +167,4 @@ cython_debug/ # ignore the aifs index files _.aifs +01OS/output_audio.wav diff --git a/01OS/.env.example b/01OS/.env.example index f05028c..388ae53 100644 --- a/01OS/.env.example +++ b/01OS/.env.example @@ -35,6 +35,17 @@ STT_RUNNER=client # If server, audio will be sent over websocket. # Will expose the server publically and display that URL. SERVER_EXPOSE_PUBLICALLY=False +# Image capture settings +CAMERA_ENABLED=True + +# Camera device selection (Typically 0 for built-in, 1 for USB) +CAMERA_DEVICE_INDEX=0 + +# Camera warmup time +# This is a workaround for some cameras that don't immediately +# return a properly exposed picture when they are first turned on +CAMERA_WARMUP_SECONDS=0.4 + # Debug level # LOG_LEVEL=DEBUG LOG_LEVEL="INFO" \ No newline at end of file diff --git a/01OS/01OS/clients/base_device.py b/01OS/01OS/clients/base_device.py index d7dd30f..60f3cf3 100644 --- a/01OS/01OS/clients/base_device.py +++ b/01OS/01OS/clients/base_device.py @@ -1,6 +1,7 @@ from dotenv import load_dotenv load_dotenv() # take environment variables from .env. +import os import asyncio import threading import os @@ -21,6 +22,8 @@ import time import wave import tempfile from datetime import datetime +import cv2 +import base64 from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run? # In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic? from ..server.utils.kernel import put_kernel_messages_into_queue @@ -44,6 +47,11 @@ RATE = 44100 # Sample rate RECORDING = False # Flag to control recording state SPACEBAR_PRESSED = False # Flag to track spacebar press state +# Camera configuration +CAMERA_ENABLED = bool(os.getenv('CAMERA_ENABLED', False)) +CAMERA_DEVICE_INDEX = int(os.getenv('CAMERA_DEVICE_INDEX', 0)) +CAMERA_WARMUP_SECONDS = float(os.getenv('CAMERA_WARMUP_SECONDS', 0)) + # Specify OS current_platform = get_system_info() @@ -54,9 +62,64 @@ send_queue = queue.Queue() class Device: def __init__(self): + self.pressed_keys = set() + self.captured_images = [] self.audiosegments = [] - pass + def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX): + """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list.""" + image_path = None + + cap = cv2.VideoCapture(camera_index) + ret, frame = cap.read() # Capture a single frame to initialize the camera + + if CAMERA_WARMUP_SECONDS > 0: + # Allow camera to warm up, then snap a picture again + # This is a workaround for some cameras that don't return a properly exposed + # picture immediately when they are first turned on + time.sleep(CAMERA_WARMUP_SECONDS) + ret, frame = cap.read() + + if ret: + temp_dir = tempfile.gettempdir() + image_path = os.path.join(temp_dir, f"01_photo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.png") + self.captured_images.append(image_path) + cv2.imwrite(image_path, frame) + logger.info(f"Camera image captured to {image_path}") + logger.info(f"You now have {len(self.captured_images)} images which will be sent along with your next audio message.") + else: + logger.error(f"Error: Couldn't capture an image from camera ({camera_index})") + + cap.release() + + return image_path + + + def encode_image_to_base64(self, image_path): + """Encodes an image file to a base64 string.""" + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode('utf-8') + + def add_image_to_send_queue(self, image_path): + """Encodes an image and adds an LMC message to the send queue with the image data.""" + base64_image = self.encode_image_to_base64(image_path) + image_message = { + "role": "user", + "type": "image", + "format": "base64.png", + "content": base64_image + } + send_queue.put(image_message) + # Delete the image file from the file system after sending it + os.remove(image_path) + + def queue_all_captured_images(self): + """Queues all captured images to be sent.""" + for image_path in self.captured_images: + self.add_image_to_send_queue(image_path) + self.captured_images.clear() # Clear the list after sending + + async def play_audiosegments(self): """Plays them sequentially.""" while True: @@ -112,6 +175,8 @@ class Device: send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "content": ""}) send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "end": True}) else: + self.queue_all_captured_images() + if os.getenv('STT_RUNNER') == "client": # Run stt then send text text = stt_wav(wav_path) @@ -142,18 +207,28 @@ class Device: RECORDING = False def on_press(self, key): - """Detect spacebar press.""" - if key == keyboard.Key.space: + """Detect spacebar press, ESC key press, and Ctrl+C combination.""" + self.pressed_keys.add(key) # Add the pressed key to the set + + if keyboard.Key.esc in self.pressed_keys: + logger.info("Exiting...") + os._exit(0) + elif keyboard.Key.space in self.pressed_keys: self.toggle_recording(True) + elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char('c')} <= self.pressed_keys: + logger.info("Ctrl+C pressed. Exiting...") + os._exit(0) def on_release(self, key): - """Detect spacebar release and ESC key press.""" + """Detect spacebar release and 'c' key press for camera, and handle key release.""" + self.pressed_keys.discard(key) # Remove the released key from the key press tracking set + if key == keyboard.Key.space: self.toggle_recording(False) - elif key == keyboard.Key.esc or (key == keyboard.Key.ctrl and keyboard.Key.c): - logger.info("Exiting...") - os._exit(0) + elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'): + self.fetch_image_from_camera() + async def message_sender(self, websocket): while True: message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get) @@ -168,7 +243,11 @@ class Device: while True: try: async with websockets.connect(WS_URL) as websocket: - logger.info("Press the spacebar to start/stop recording. Press ESC to exit.") + if CAMERA_ENABLED: + logger.info("Press the spacebar to start/stop recording. Press 'c' to capture an image from the camera. Press ESC to exit.") + else: + logger.info("Press the spacebar to start/stop recording. Press ESC to exit.") + asyncio.create_task(self.message_sender(websocket)) while True: diff --git a/01OS/01OS/clients/start.sh b/01OS/01OS/clients/start.sh index 8e8edc9..844e6a0 100644 --- a/01OS/01OS/clients/start.sh +++ b/01OS/01OS/clients/start.sh @@ -1,8 +1,8 @@ DEVICE=$(uname -n) if [[ "$DEVICE" == "rpi" ]]; then cd 01OS - python -m 01OS.clients.rpi.device & + python -m 01OS.clients.rpi.device else cd 01OS - python -m 01OS.clients.macos.device & + python -m 01OS.clients.macos.device fi diff --git a/01OS/poetry.lock b/01OS/poetry.lock index 57c2429..66ee052 100644 --- a/01OS/poetry.lock +++ b/01OS/poetry.lock @@ -1890,6 +1890,31 @@ typing-extensions = ">=4.7,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +[[package]] +name = "opencv-python" +version = "4.9.0.80" +description = "Wrapper package for OpenCV python bindings." +optional = false +python-versions = ">=3.6" +files = [ + {file = "opencv-python-4.9.0.80.tar.gz", hash = "sha256:1a9f0e6267de3a1a1db0c54213d022c7c8b5b9ca4b580e80bdc58516c922c9e1"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:7e5f7aa4486651a6ebfa8ed4b594b65bd2d2f41beeb4241a3e4b1b85acbbbadb"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71dfb9555ccccdd77305fc3dcca5897fbf0cf28b297c51ee55e079c065d812a3"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b34a52e9da36dda8c151c6394aed602e4b17fa041df0b9f5b93ae10b0fcca2a"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4088cab82b66a3b37ffc452976b14a3c599269c247895ae9ceb4066d8188a57"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:dcf000c36dd1651118a2462257e3a9e76db789a78432e1f303c7bac54f63ef6c"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:3f16f08e02b2a2da44259c7cc712e779eff1dd8b55fdb0323e8cab09548086c0"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, + {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, +] + [[package]] name = "packaging" version = "23.2" @@ -3514,4 +3539,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "12ccff8a2521e7eb88eee82cfd3de409fea8e1658406d6148a42f9347ca7b2a7" +content-hash = "5c8d587b405e97c0dca454078950157106f9aea687cbecce5b7ae7effd2aeece" diff --git a/01OS/pyproject.toml b/01OS/pyproject.toml index c36dcc3..42f5992 100644 --- a/01OS/pyproject.toml +++ b/01OS/pyproject.toml @@ -25,6 +25,7 @@ pydub = "^0.25.1" ngrok = "^1.0.0" open-interpreter = "^0.2.0" simpleaudio = "^1.0.4" +opencv-python = "^4.9.0.80" [build-system] requires = ["poetry-core"] diff --git a/01OS/start.sh b/01OS/start.sh index 201f475..36b7ec6 100755 --- a/01OS/start.sh +++ b/01OS/start.sh @@ -1,5 +1,12 @@ #!/usr/bin/env bash +# Set python to prioritize the module files from the current directory +# If we don't do this, then the python interpreter will not be able to find the modules, +# and will throw an error like "ModuleNotFoundError: No module named '01OS'". +# If we solve the problem by pip installing the official 01OS package, then those +# modules will run instead of the local ones that we are trying to develop with. +export PYTHONPATH="$(pwd):$PYTHONPATH" + ### Import Environment Variables from .env SCRIPT_DIR="$(dirname "$0")" if [ ! -f "$SCRIPT_DIR/.env" ]; then