diff --git a/software/poetry.lock b/software/poetry.lock index 7b77af8..4d51682 100644 --- a/software/poetry.lock +++ b/software/poetry.lock @@ -4090,69 +4090,67 @@ sympy = "*" [[package]] name = "open-interpreter" -version = "0.3.13" +version = "0.3.12" description = "Let language models run code" optional = false -python-versions = ">=3.9,<4" -files = [] -develop = false - -[package.dependencies] -astor = "^0.8.1" -fastapi = {version = "^0.111.0", optional = true} -git-python = "^1.0.3" -google-generativeai = "^0.7.1" -html2image = "^2.0.4.3" -html2text = "^2024.2.26" -inquirer = "^3.1.3" -ipykernel = "^6.26.0" -ipywidgets = {version = "^8.1.2", optional = true} -janus = {version = "^1.0.0", optional = true} -jupyter-client = "^8.6.0" -litellm = "^1.41.26" -matplotlib = "^3.8.2" -opencv-python = {version = "^4.8.1.78", optional = true} -platformdirs = "^4.2.0" -plyer = {version = "^2.1.0", optional = true} -psutil = "^5.9.6" -pyautogui = {version = "^0.9.54", optional = true} -pydantic = "^2.6.4" -pyperclip = "^1.9.0" -pyreadline3 = {version = "^3.4.1", markers = "sys_platform == \"win32\""} -pytesseract = {version = "^0.3.10", optional = true} -pywinctl = {version = "^0.3", optional = true} -pyyaml = "^6.0.1" -rich = "^13.4.2" -screeninfo = {version = "^0.8.1", optional = true} -selenium = "^4.24.0" -send2trash = "^1.8.2" -sentence-transformers = {version = "^2.5.1", optional = true} +python-versions = "<4,>=3.9" +files = [ + {file = "open_interpreter-0.3.12-py3-none-any.whl", hash = "sha256:0e155568685ea927c2b4b8139e4ce9bdf03ee0f8f5a71030f07988f72b305721"}, + {file = "open_interpreter-0.3.12.tar.gz", hash = "sha256:ab3ebde071ab19812513880be3dedcccd7c545dbea1b8a31bd054ef6332acbf6"}, +] + +[package.dependencies] +astor = ">=0.8.1,<0.9.0" +fastapi = {version = ">=0.111.0,<0.112.0", optional = true, markers = "extra == \"server\""} +git-python = ">=1.0.3,<2.0.0" +google-generativeai = ">=0.7.1,<0.8.0" +html2image = ">=2.0.4.3,<3.0.0.0" +html2text = ">=2024.2.26,<2025.0.0" +inquirer = ">=3.1.3,<4.0.0" +ipykernel = ">=6.26.0,<7.0.0" +ipywidgets = {version = ">=8.1.2,<9.0.0", optional = true, markers = "extra == \"os\""} +janus = {version = ">=1.0.0,<2.0.0", optional = true, markers = "extra == \"server\""} +jupyter-client = ">=8.6.0,<9.0.0" +litellm = ">=1.41.26,<2.0.0" +matplotlib = ">=3.8.2,<4.0.0" +nltk = ">=3.8.1,<4.0.0" +opencv-python = {version = ">=4.8.1.78,<5.0.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""} +platformdirs = ">=4.2.0,<5.0.0" +plyer = {version = ">=2.1.0,<3.0.0", optional = true, markers = "extra == \"os\""} +psutil = ">=5.9.6,<6.0.0" +pyautogui = {version = ">=0.9.54,<0.10.0", optional = true, markers = "extra == \"os\""} +pydantic = ">=2.6.4,<3.0.0" +pyperclip = ">=1.9.0,<2.0.0" +pyreadline3 = {version = ">=3.4.1,<4.0.0", markers = "sys_platform == \"win32\""} +pytesseract = {version = ">=0.3.10,<0.4.0", optional = true, markers = "extra == \"os\" or extra == \"local\""} +pywinctl = {version = ">=0.3,<0.4", optional = true, markers = "extra == \"os\""} +pyyaml = ">=6.0.1,<7.0.0" +rich = ">=13.4.2,<14.0.0" +screeninfo = {version = ">=0.8.1,<0.9.0", optional = true, markers = "extra == \"os\""} +selenium = ">=4.24.0,<5.0.0" +send2trash = ">=1.8.2,<2.0.0" +sentence-transformers = {version = ">=2.5.1,<3.0.0", optional = true, markers = "extra == \"os\""} setuptools = "*" -shortuuid = "^1.0.13" -six = "^1.16.0" -starlette = "^0.37.2" -tiktoken = "^0.7.0" -timm = {version = "^0.9.16", optional = true} -tokentrim = "^0.1.13" -toml = "^0.10.2" -typer = "^0.12.4" -uvicorn = {version = "^0.30.1", optional = true} -webdriver-manager = "^4.0.2" -wget = "^3.2" -yaspin = "^3.0.2" +shortuuid = ">=1.0.13,<2.0.0" +six = ">=1.16.0,<2.0.0" +starlette = ">=0.37.2,<0.38.0" +tiktoken = ">=0.7.0,<0.8.0" +timm = {version = ">=0.9.16,<0.10.0", optional = true, markers = "extra == \"os\""} +tokentrim = ">=0.1.13,<0.2.0" +toml = ">=0.10.2,<0.11.0" +torch = {version = ">=2.2.1,<3.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""} +typer = ">=0.12.4,<0.13.0" +uvicorn = {version = ">=0.30.1,<0.31.0", optional = true, markers = "extra == \"server\""} +webdriver-manager = ">=4.0.2,<5.0.0" +wget = ">=3.2,<4.0" +yaspin = ">=3.0.2,<4.0.0" [package.extras] local = ["easyocr (>=1.7.1,<2.0.0)", "einops (>=0.8.0,<0.9.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "torch (>=2.2.1,<3.0.0)", "torchvision (>=0.18.0,<0.19.0)", "transformers (==4.41.2)"] -os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)"] +os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)", "torch (>=2.2.1,<3.0.0)"] safe = ["semgrep (>=1.52.0,<2.0.0)"] server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=0.30.1,<0.31.0)"] -[package.source] -type = "git" -url = "https://github.com/openinterpreter/open-interpreter.git" -reference = "development" -resolved_reference = "bd24acd89d3caf113a14109106952de2c793432f" - [[package]] name = "openai" version = "1.36.1" @@ -11080,4 +11078,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "7c40eca9d5b84b65894ddef258e1d7237d1698a62db1a68ee42871fe20d18f6f" +content-hash = "5532f1b0732f4c73e30db78b717afb0fd5cf327f1bf093f073cec113428cc4b9" diff --git a/software/pyproject.toml b/software/pyproject.toml index 391381d..ab65920 100644 --- a/software/pyproject.toml +++ b/software/pyproject.toml @@ -19,7 +19,7 @@ livekit-plugins-openai = "^0.10.1" livekit-plugins-silero = "^0.7.1" livekit-plugins-elevenlabs = "^0.7.5" segno = "^1.6.1" -open-interpreter = {git = "https://github.com/openinterpreter/open-interpreter.git", rev = "development", extras = ["os", "server"]} +open-interpreter = {extras = ["os", "server"], version = "^0.3.12"} # You should add a "browser" extra, so selenium isn't in the main package ngrok = "^1.4.0" realtimetts = {extras = ["all"], version = "^0.4.5"} realtimestt = "^0.2.41" diff --git a/software/source/server/livekit/worker.py b/software/source/server/livekit/worker.py index 67de8b0..5b76399 100644 --- a/software/source/server/livekit/worker.py +++ b/software/source/server/livekit/worker.py @@ -1,130 +1,21 @@ import asyncio import copy import os -import re from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli -from livekit.agents.transcription import STTSegmentsForwarder -from livekit.agents.llm import ChatContext, ChatMessage, LLMStream, ChatChunk, ChoiceDelta, Choice +from livekit.agents.llm import ChatContext, ChatMessage from livekit import rtc from livekit.agents.voice_assistant import VoiceAssistant from livekit.plugins import deepgram, openai, silero, elevenlabs from dotenv import load_dotenv import sys import numpy as np -from typing import AsyncIterator + load_dotenv() start_message = """Hi! You can hold the white circle below to speak to me. Try asking what I can do.""" -class ProcessedLLMStream(LLMStream): - def __init__( - self, - original_stream: LLMStream, - regex_pattern: str = r'', - ) -> None: - super().__init__(chat_ctx=original_stream.chat_ctx, fnc_ctx=original_stream.fnc_ctx) - self.original_stream = original_stream - self.regex_pattern = regex_pattern - self.init_match = '<.*?' # match for the '<' and any characters to the left of it - self.accumulating = False - self._aiter = self._process_stream() - self._buffer = "" - - - async def _process_stream(self) -> AsyncIterator[ChatChunk]: - async for chunk in self.original_stream: - new_choices = [] - for choice in chunk.choices: - content = choice.delta.content - - if content: - init_match = re.search(self.init_match, content) - if init_match: - print("INITIAL MATCH FOUND!!!!!!") - print("INITIAL MATCH FOUND!!!!!!") - print("INITIAL MATCH FOUND!!!!!!") - print("INITIAL MATCH FOUND!!!!!!") - print("INITIAL MATCH FOUND!!!!!!") - print("INITIAL MATCH FOUND!!!!!!") - print("INITIAL MATCH FOUND!!!!!!") - self.accumulating = True - if self.accumulating: - self._buffer += content - print("ACCUMULATING BUFFER!!!") - print("ACCUMULATING BUFFER!!!") - print("ACCUMULATING BUFFER!!!") - print("ACCUMULATING BUFFER!!!") - print("ACCUMULATING BUFFER!!!") - print("ACCUMULATING BUFFER!!!") - match = re.search(self.regex_pattern, self._buffer) - if match: - code = match.group(1) - print(f"Extracted Code: {code}") - - # Create a confirmation message - confirmation_msg = ChatMessage( - role="assistant", - content=f"Code extracted: {code}", - ) - - # Wrap the confirmation message in ChoiceDelta and Choice - choice_delta = ChoiceDelta( - role=confirmation_msg.role, - content=str(confirmation_msg.content) # we know confirmation_msg.content is a string - ) - new_choice = Choice( - delta=choice_delta, - index=choice.index - ) - - # Create a new ChatChunk with the confirmation Choice - confirmation_chunk = ChatChunk(choices=[new_choice]) - - # Yield the confirmation chunk - yield confirmation_chunk - self.accumulating = False - self._buffer = "" - continue # Skip yielding the original content - new_choices.append(choice) - if new_choices: - yield ChatChunk(choices=new_choices) - - async def __anext__(self) -> ChatChunk: - try: - return await self._aiter.__anext__() - except StopAsyncIteration: - await self.aclose() - raise - -def _01_synthesize_assistant_reply( - assistant: VoiceAssistant, chat_ctx: ChatContext -) -> LLMStream: - """ - Custom function to process the OpenAI compatible endpoint's output. - Extracts code from responses matching the pattern. - - Args: - assistant (VoiceAssistant): The VoiceAssistant instance. - chat_ctx (ChatContext): The current chat context. - - Returns: - LLMStream: The processed LLMStream. - """ - llm_stream = assistant.llm.chat(chat_ctx=chat_ctx, fnc_ctx=assistant.fnc_ctx) - print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") - print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") - print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") - print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") - print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") - print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") - print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") - print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") - print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM") - - return ProcessedLLMStream(original_stream=llm_stream) - # This function is the entrypoint for the agent. async def entrypoint(ctx: JobContext): # Create an initial chat context with a system prompt @@ -205,7 +96,6 @@ async def entrypoint(ctx: JobContext): llm=open_interpreter, # Language Model tts=tts, # Text-to-Speech chat_ctx=initial_ctx, # Chat history context - # will_synthesize_assistant_reply=_01_synthesize_assistant_reply, ) chat = rtc.ChatManager(ctx.room) @@ -228,26 +118,13 @@ async def entrypoint(ctx: JobContext): await asyncio.sleep(1) - print("HELLO FROM INSIDE THE WORKER") - print("HELLO FROM INSIDE THE WORKER") - print("HELLO FROM INSIDE THE WORKER") - print("HELLO FROM INSIDE THE WORKER") - print("HELLO FROM INSIDE THE WORKER") - # Greets the user with an initial message await assistant.say(start_message, allow_interruptions=True) - stt_forwarder = STTSegmentsForwarder(room=ctx.room, participant=ctx.room.local_participant) - await stt_forwarder._run() - def main(livekit_url): - print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") - print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") - print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") - print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") - print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅") + # Workers have to be run as CLIs right now. # So we need to simualte running "[this file] dev" @@ -257,5 +134,5 @@ def main(livekit_url): # Initialize the worker with the entrypoint cli.run_app( - WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url, port=8082) + WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url) ) \ No newline at end of file