diff --git a/software/poetry.lock b/software/poetry.lock index 4d51682..7b77af8 100644 --- a/software/poetry.lock +++ b/software/poetry.lock @@ -4090,67 +4090,69 @@ sympy = "*" [[package]] name = "open-interpreter" -version = "0.3.12" +version = "0.3.13" description = "Let language models run code" optional = false -python-versions = "<4,>=3.9" -files = [ - {file = "open_interpreter-0.3.12-py3-none-any.whl", hash = "sha256:0e155568685ea927c2b4b8139e4ce9bdf03ee0f8f5a71030f07988f72b305721"}, - {file = "open_interpreter-0.3.12.tar.gz", hash = "sha256:ab3ebde071ab19812513880be3dedcccd7c545dbea1b8a31bd054ef6332acbf6"}, -] - -[package.dependencies] -astor = ">=0.8.1,<0.9.0" -fastapi = {version = ">=0.111.0,<0.112.0", optional = true, markers = "extra == \"server\""} -git-python = ">=1.0.3,<2.0.0" -google-generativeai = ">=0.7.1,<0.8.0" -html2image = ">=2.0.4.3,<3.0.0.0" -html2text = ">=2024.2.26,<2025.0.0" -inquirer = ">=3.1.3,<4.0.0" -ipykernel = ">=6.26.0,<7.0.0" -ipywidgets = {version = ">=8.1.2,<9.0.0", optional = true, markers = "extra == \"os\""} -janus = {version = ">=1.0.0,<2.0.0", optional = true, markers = "extra == \"server\""} -jupyter-client = ">=8.6.0,<9.0.0" -litellm = ">=1.41.26,<2.0.0" -matplotlib = ">=3.8.2,<4.0.0" -nltk = ">=3.8.1,<4.0.0" -opencv-python = {version = ">=4.8.1.78,<5.0.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""} -platformdirs = ">=4.2.0,<5.0.0" -plyer = {version = ">=2.1.0,<3.0.0", optional = true, markers = "extra == \"os\""} -psutil = ">=5.9.6,<6.0.0" -pyautogui = {version = ">=0.9.54,<0.10.0", optional = true, markers = "extra == \"os\""} -pydantic = ">=2.6.4,<3.0.0" -pyperclip = ">=1.9.0,<2.0.0" -pyreadline3 = {version = ">=3.4.1,<4.0.0", markers = "sys_platform == \"win32\""} -pytesseract = {version = ">=0.3.10,<0.4.0", optional = true, markers = "extra == \"os\" or extra == \"local\""} -pywinctl = {version = ">=0.3,<0.4", optional = true, markers = "extra == \"os\""} -pyyaml = ">=6.0.1,<7.0.0" -rich = ">=13.4.2,<14.0.0" -screeninfo = {version = ">=0.8.1,<0.9.0", optional = true, markers = "extra == \"os\""} -selenium = ">=4.24.0,<5.0.0" -send2trash = ">=1.8.2,<2.0.0" -sentence-transformers = {version = ">=2.5.1,<3.0.0", optional = true, markers = "extra == \"os\""} +python-versions = ">=3.9,<4" +files = [] +develop = false + +[package.dependencies] +astor = "^0.8.1" +fastapi = {version = "^0.111.0", optional = true} +git-python = "^1.0.3" +google-generativeai = "^0.7.1" +html2image = "^2.0.4.3" +html2text = "^2024.2.26" +inquirer = "^3.1.3" +ipykernel = "^6.26.0" +ipywidgets = {version = "^8.1.2", optional = true} +janus = {version = "^1.0.0", optional = true} +jupyter-client = "^8.6.0" +litellm = "^1.41.26" +matplotlib = "^3.8.2" +opencv-python = {version = "^4.8.1.78", optional = true} +platformdirs = "^4.2.0" +plyer = {version = "^2.1.0", optional = true} +psutil = "^5.9.6" +pyautogui = {version = "^0.9.54", optional = true} +pydantic = "^2.6.4" +pyperclip = "^1.9.0" +pyreadline3 = {version = "^3.4.1", markers = "sys_platform == \"win32\""} +pytesseract = {version = "^0.3.10", optional = true} +pywinctl = {version = "^0.3", optional = true} +pyyaml = "^6.0.1" +rich = "^13.4.2" +screeninfo = {version = "^0.8.1", optional = true} +selenium = "^4.24.0" +send2trash = "^1.8.2" +sentence-transformers = {version = "^2.5.1", optional = true} setuptools = "*" -shortuuid = ">=1.0.13,<2.0.0" -six = ">=1.16.0,<2.0.0" -starlette = ">=0.37.2,<0.38.0" -tiktoken = ">=0.7.0,<0.8.0" -timm = {version = ">=0.9.16,<0.10.0", optional = true, markers = "extra == \"os\""} -tokentrim = ">=0.1.13,<0.2.0" -toml = ">=0.10.2,<0.11.0" -torch = {version = ">=2.2.1,<3.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""} -typer = ">=0.12.4,<0.13.0" -uvicorn = {version = ">=0.30.1,<0.31.0", optional = true, markers = "extra == \"server\""} -webdriver-manager = ">=4.0.2,<5.0.0" -wget = ">=3.2,<4.0" -yaspin = ">=3.0.2,<4.0.0" +shortuuid = "^1.0.13" +six = "^1.16.0" +starlette = "^0.37.2" +tiktoken = "^0.7.0" +timm = {version = "^0.9.16", optional = true} +tokentrim = "^0.1.13" +toml = "^0.10.2" +typer = "^0.12.4" +uvicorn = {version = "^0.30.1", optional = true} +webdriver-manager = "^4.0.2" +wget = "^3.2" +yaspin = "^3.0.2" [package.extras] local = ["easyocr (>=1.7.1,<2.0.0)", "einops (>=0.8.0,<0.9.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "torch (>=2.2.1,<3.0.0)", "torchvision (>=0.18.0,<0.19.0)", "transformers (==4.41.2)"] -os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)", "torch (>=2.2.1,<3.0.0)"] +os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)"] safe = ["semgrep (>=1.52.0,<2.0.0)"] server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=0.30.1,<0.31.0)"] +[package.source] +type = "git" +url = "https://github.com/openinterpreter/open-interpreter.git" +reference = "development" +resolved_reference = "bd24acd89d3caf113a14109106952de2c793432f" + [[package]] name = "openai" version = "1.36.1" @@ -11078,4 +11080,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "5532f1b0732f4c73e30db78b717afb0fd5cf327f1bf093f073cec113428cc4b9" +content-hash = "7c40eca9d5b84b65894ddef258e1d7237d1698a62db1a68ee42871fe20d18f6f" diff --git a/software/pyproject.toml b/software/pyproject.toml index 63d6c59..6b75c87 100644 --- a/software/pyproject.toml +++ b/software/pyproject.toml @@ -19,7 +19,7 @@ livekit-plugins-openai = "^0.8.1" livekit-plugins-silero = "^0.6.4" livekit-plugins-elevenlabs = "^0.7.3" segno = "^1.6.1" -open-interpreter = {extras = ["os", "server"], version = "^0.3.12"} # You should add a "browser" extra, so selenium isn't in the main package +open-interpreter = {git = "https://github.com/openinterpreter/open-interpreter.git", rev = "development", extras = ["os", "server"]} ngrok = "^1.4.0" realtimetts = {extras = ["all"], version = "^0.4.5"} realtimestt = "^0.2.41" diff --git a/software/source/server/livekit/worker.py b/software/source/server/livekit/worker.py index 5b76399..67de8b0 100644 --- a/software/source/server/livekit/worker.py +++ b/software/source/server/livekit/worker.py @@ -1,21 +1,130 @@ import asyncio import copy import os +import re from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli -from livekit.agents.llm import ChatContext, ChatMessage +from livekit.agents.transcription import STTSegmentsForwarder +from livekit.agents.llm import ChatContext, ChatMessage, LLMStream, ChatChunk, ChoiceDelta, Choice from livekit import rtc from livekit.agents.voice_assistant import VoiceAssistant from livekit.plugins import deepgram, openai, silero, elevenlabs from dotenv import load_dotenv import sys import numpy as np - +from typing import AsyncIterator load_dotenv() start_message = """Hi! You can hold the white circle below to speak to me. Try asking what I can do.""" +class ProcessedLLMStream(LLMStream): + def __init__( + self, + original_stream: LLMStream, + regex_pattern: str = r'', + ) -> None: + super().__init__(chat_ctx=original_stream.chat_ctx, fnc_ctx=original_stream.fnc_ctx) + self.original_stream = original_stream + self.regex_pattern = regex_pattern + self.init_match = '<.*?' # match for the '<' and any characters to the left of it + self.accumulating = False + self._aiter = self._process_stream() + self._buffer = "" + + + async def _process_stream(self) -> AsyncIterator[ChatChunk]: + async for chunk in self.original_stream: + new_choices = [] + for choice in chunk.choices: + content = choice.delta.content + + if content: + init_match = re.search(self.init_match, content) + if init_match: + print("INITIAL MATCH FOUND!!!!!!") + print("INITIAL MATCH FOUND!!!!!!") + print("INITIAL MATCH FOUND!!!!!!") + print("INITIAL MATCH FOUND!!!!!!") + print("INITIAL MATCH FOUND!!!!!!") + print("INITIAL MATCH FOUND!!!!!!") + print("INITIAL MATCH FOUND!!!!!!") + self.accumulating = True + if self.accumulating: + self._buffer += content + print("ACCUMULATING BUFFER!!!") + print("ACCUMULATING BUFFER!!!") + print("ACCUMULATING BUFFER!!!") + print("ACCUMULATING BUFFER!!!") + print("ACCUMULATING BUFFER!!!") + print("ACCUMULATING BUFFER!!!") + match = re.search(self.regex_pattern, self._buffer) + if match: + code = match.group(1) + print(f"Extracted Code: {code}") + + # Create a confirmation message + confirmation_msg = ChatMessage( + role="assistant", + content=f"Code extracted: {code}", + ) + + # Wrap the confirmation message in ChoiceDelta and Choice + choice_delta = ChoiceDelta( + role=confirmation_msg.role, + content=str(confirmation_msg.content) # we know confirmation_msg.content is a string + ) + new_choice = Choice( + delta=choice_delta, + index=choice.index + ) + + # Create a new ChatChunk with the confirmation Choice + confirmation_chunk = ChatChunk(choices=[new_choice]) + + # Yield the confirmation chunk + yield confirmation_chunk + self.accumulating = False + self._buffer = "" + continue # Skip yielding the original content + new_choices.append(choice) + if new_choices: + yield ChatChunk(choices=new_choices) + + async def __anext__(self) -> ChatChunk: + try: + return await self._aiter.__anext__() + except StopAsyncIteration: + await self.aclose() + raise + +def _01_synthesize_assistant_reply( + assistant: VoiceAssistant, chat_ctx: ChatContext +) -> LLMStream: + """ + Custom function to process the OpenAI compatible endpoint's output. + Extracts code from responses matching the pattern. + + Args: + assistant (VoiceAssistant): The VoiceAssistant instance. + chat_ctx (ChatContext): The current chat context. + + Returns: + LLMStream: The processed LLMStream. + """ + llm_stream = assistant.llm.chat(chat_ctx=chat_ctx, fnc_ctx=assistant.fnc_ctx) + print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") + print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") + print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") + print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") + print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") + print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") + print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") + print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") + print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") + + return ProcessedLLMStream(original_stream=llm_stream) + # This function is the entrypoint for the agent. async def entrypoint(ctx: JobContext): # Create an initial chat context with a system prompt @@ -96,6 +205,7 @@ async def entrypoint(ctx: JobContext): llm=open_interpreter, # Language Model tts=tts, # Text-to-Speech chat_ctx=initial_ctx, # Chat history context + # will_synthesize_assistant_reply=_01_synthesize_assistant_reply, ) chat = rtc.ChatManager(ctx.room) @@ -118,13 +228,26 @@ async def entrypoint(ctx: JobContext): await asyncio.sleep(1) + print("HELLO FROM INSIDE THE WORKER") + print("HELLO FROM INSIDE THE WORKER") + print("HELLO FROM INSIDE THE WORKER") + print("HELLO FROM INSIDE THE WORKER") + print("HELLO FROM INSIDE THE WORKER") + # Greets the user with an initial message await assistant.say(start_message, allow_interruptions=True) + stt_forwarder = STTSegmentsForwarder(room=ctx.room, participant=ctx.room.local_participant) + await stt_forwarder._run() + def main(livekit_url): - + print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") + print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") + print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") + print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") + print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") # Workers have to be run as CLIs right now. # So we need to simualte running "[this file] dev" @@ -134,5 +257,5 @@ def main(livekit_url): # Initialize the worker with the entrypoint cli.run_app( - WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url) + WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url, port=8082) ) \ No newline at end of file