process custom flags on worker

1 year ago · 12efc95688
parent 197417a65b
commit 12efc95688
3 changed files with 183 additions and 58 deletions
--- a/software/poetry.lock
+++ b/software/poetry.lock
@ -4090,67 +4090,69 @@ sympy = "*"

 [[package]]
 name = "open-interpreter"
-version = "0.3.12"
+version = "0.3.13"
 description = "Let language models run code"
 optional = false
-python-versions = "<4,>=3.9"
-files = [
-    {file = "open_interpreter-0.3.12-py3-none-any.whl", hash = "sha256:0e155568685ea927c2b4b8139e4ce9bdf03ee0f8f5a71030f07988f72b305721"},
-    {file = "open_interpreter-0.3.12.tar.gz", hash = "sha256:ab3ebde071ab19812513880be3dedcccd7c545dbea1b8a31bd054ef6332acbf6"},
-]
-
-[package.dependencies]
-astor = ">=0.8.1,<0.9.0"
-fastapi = {version = ">=0.111.0,<0.112.0", optional = true, markers = "extra == \"server\""}
-git-python = ">=1.0.3,<2.0.0"
-google-generativeai = ">=0.7.1,<0.8.0"
-html2image = ">=2.0.4.3,<3.0.0.0"
-html2text = ">=2024.2.26,<2025.0.0"
-inquirer = ">=3.1.3,<4.0.0"
-ipykernel = ">=6.26.0,<7.0.0"
-ipywidgets = {version = ">=8.1.2,<9.0.0", optional = true, markers = "extra == \"os\""}
-janus = {version = ">=1.0.0,<2.0.0", optional = true, markers = "extra == \"server\""}
-jupyter-client = ">=8.6.0,<9.0.0"
-litellm = ">=1.41.26,<2.0.0"
-matplotlib = ">=3.8.2,<4.0.0"
-nltk = ">=3.8.1,<4.0.0"
-opencv-python = {version = ">=4.8.1.78,<5.0.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""}
-platformdirs = ">=4.2.0,<5.0.0"
-plyer = {version = ">=2.1.0,<3.0.0", optional = true, markers = "extra == \"os\""}
-psutil = ">=5.9.6,<6.0.0"
-pyautogui = {version = ">=0.9.54,<0.10.0", optional = true, markers = "extra == \"os\""}
-pydantic = ">=2.6.4,<3.0.0"
-pyperclip = ">=1.9.0,<2.0.0"
-pyreadline3 = {version = ">=3.4.1,<4.0.0", markers = "sys_platform == \"win32\""}
-pytesseract = {version = ">=0.3.10,<0.4.0", optional = true, markers = "extra == \"os\" or extra == \"local\""}
-pywinctl = {version = ">=0.3,<0.4", optional = true, markers = "extra == \"os\""}
-pyyaml = ">=6.0.1,<7.0.0"
-rich = ">=13.4.2,<14.0.0"
-screeninfo = {version = ">=0.8.1,<0.9.0", optional = true, markers = "extra == \"os\""}
-selenium = ">=4.24.0,<5.0.0"
-send2trash = ">=1.8.2,<2.0.0"
-sentence-transformers = {version = ">=2.5.1,<3.0.0", optional = true, markers = "extra == \"os\""}
+python-versions = ">=3.9,<4"
+files = []
+develop = false
+
+[package.dependencies]
+astor = "^0.8.1"
+fastapi = {version = "^0.111.0", optional = true}
+git-python = "^1.0.3"
+google-generativeai = "^0.7.1"
+html2image = "^2.0.4.3"
+html2text = "^2024.2.26"
+inquirer = "^3.1.3"
+ipykernel = "^6.26.0"
+ipywidgets = {version = "^8.1.2", optional = true}
+janus = {version = "^1.0.0", optional = true}
+jupyter-client = "^8.6.0"
+litellm = "^1.41.26"
+matplotlib = "^3.8.2"
+opencv-python = {version = "^4.8.1.78", optional = true}
+platformdirs = "^4.2.0"
+plyer = {version = "^2.1.0", optional = true}
+psutil = "^5.9.6"
+pyautogui = {version = "^0.9.54", optional = true}
+pydantic = "^2.6.4"
+pyperclip = "^1.9.0"
+pyreadline3 = {version = "^3.4.1", markers = "sys_platform == \"win32\""}
+pytesseract = {version = "^0.3.10", optional = true}
+pywinctl = {version = "^0.3", optional = true}
+pyyaml = "^6.0.1"
+rich = "^13.4.2"
+screeninfo = {version = "^0.8.1", optional = true}
+selenium = "^4.24.0"
+send2trash = "^1.8.2"
+sentence-transformers = {version = "^2.5.1", optional = true}
 setuptools = "*"
-shortuuid = ">=1.0.13,<2.0.0"
-six = ">=1.16.0,<2.0.0"
-starlette = ">=0.37.2,<0.38.0"
-tiktoken = ">=0.7.0,<0.8.0"
-timm = {version = ">=0.9.16,<0.10.0", optional = true, markers = "extra == \"os\""}
-tokentrim = ">=0.1.13,<0.2.0"
-toml = ">=0.10.2,<0.11.0"
-torch = {version = ">=2.2.1,<3.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""}
-typer = ">=0.12.4,<0.13.0"
-uvicorn = {version = ">=0.30.1,<0.31.0", optional = true, markers = "extra == \"server\""}
-webdriver-manager = ">=4.0.2,<5.0.0"
-wget = ">=3.2,<4.0"
-yaspin = ">=3.0.2,<4.0.0"
+shortuuid = "^1.0.13"
+six = "^1.16.0"
+starlette = "^0.37.2"
+tiktoken = "^0.7.0"
+timm = {version = "^0.9.16", optional = true}
+tokentrim = "^0.1.13"
+toml = "^0.10.2"
+typer = "^0.12.4"
+uvicorn = {version = "^0.30.1", optional = true}
+webdriver-manager = "^4.0.2"
+wget = "^3.2"
+yaspin = "^3.0.2"

 [package.extras]
 local = ["easyocr (>=1.7.1,<2.0.0)", "einops (>=0.8.0,<0.9.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "torch (>=2.2.1,<3.0.0)", "torchvision (>=0.18.0,<0.19.0)", "transformers (==4.41.2)"]
-os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)", "torch (>=2.2.1,<3.0.0)"]
+os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)"]
 safe = ["semgrep (>=1.52.0,<2.0.0)"]
 server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=0.30.1,<0.31.0)"]

+[package.source]
+type = "git"
+url = "https://github.com/openinterpreter/open-interpreter.git"
+reference = "development"
+resolved_reference = "bd24acd89d3caf113a14109106952de2c793432f"
+
 [[package]]
 name = "openai"
 version = "1.36.1"
@ -11078,4 +11080,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.12"
-content-hash = "5532f1b0732f4c73e30db78b717afb0fd5cf327f1bf093f073cec113428cc4b9"
+content-hash = "7c40eca9d5b84b65894ddef258e1d7237d1698a62db1a68ee42871fe20d18f6f"
--- a/software/pyproject.toml
+++ b/software/pyproject.toml
@ -19,7 +19,7 @@ livekit-plugins-openai = "^0.8.1"
 livekit-plugins-silero = "^0.6.4"
 livekit-plugins-elevenlabs = "^0.7.3"
 segno = "^1.6.1"
-open-interpreter = {extras = ["os", "server"], version = "^0.3.12"} # You should add a "browser" extra, so selenium isn't in the main package
+open-interpreter = {git = "https://github.com/openinterpreter/open-interpreter.git", rev = "development", extras = ["os", "server"]}
 ngrok = "^1.4.0"
 realtimetts = {extras = ["all"], version = "^0.4.5"}
 realtimestt = "^0.2.41"
--- a/software/source/server/livekit/worker.py
+++ b/software/source/server/livekit/worker.py
@ -1,21 +1,130 @@
 import asyncio
 import copy
 import os
+import re
 from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
-from livekit.agents.llm import ChatContext, ChatMessage
+from livekit.agents.transcription import STTSegmentsForwarder
+from livekit.agents.llm import ChatContext, ChatMessage, LLMStream, ChatChunk, ChoiceDelta, Choice
 from livekit import rtc
 from livekit.agents.voice_assistant import VoiceAssistant
 from livekit.plugins import deepgram, openai, silero, elevenlabs
 from dotenv import load_dotenv
 import sys
 import numpy as np
-
+from typing import AsyncIterator
 load_dotenv()

 start_message = """Hi! You can hold the white circle below to speak to me.

 Try asking what I can do."""

+class ProcessedLLMStream(LLMStream):
+    def __init__(
+        self,
+        original_stream: LLMStream,
+        regex_pattern: str = r'<unvoiced code="([^"]+)"></unvoiced>',
+    ) -> None:
+        super().__init__(chat_ctx=original_stream.chat_ctx, fnc_ctx=original_stream.fnc_ctx)
+        self.original_stream = original_stream
+        self.regex_pattern = regex_pattern
+        self.init_match = '<.*?'                    # match for the '<' and any characters to the left of it
+        self.accumulating = False
+        self._aiter = self._process_stream()
+        self._buffer = ""
+
+
+    async def _process_stream(self) -> AsyncIterator[ChatChunk]:
+        async for chunk in self.original_stream:
+            new_choices = []
+            for choice in chunk.choices:
+                content = choice.delta.content
+
+                if content:
+                    init_match = re.search(self.init_match, content)
+                    if init_match:
+                        print("INITIAL MATCH FOUND!!!!!!")
+                        print("INITIAL MATCH FOUND!!!!!!")
+                        print("INITIAL MATCH FOUND!!!!!!")
+                        print("INITIAL MATCH FOUND!!!!!!")
+                        print("INITIAL MATCH FOUND!!!!!!")
+                        print("INITIAL MATCH FOUND!!!!!!")
+                        print("INITIAL MATCH FOUND!!!!!!")
+                        self.accumulating = True
+                    if self.accumulating:
+                        self._buffer += content
+                        print("ACCUMULATING BUFFER!!!")
+                        print("ACCUMULATING BUFFER!!!")
+                        print("ACCUMULATING BUFFER!!!")
+                        print("ACCUMULATING BUFFER!!!")
+                        print("ACCUMULATING BUFFER!!!")
+                        print("ACCUMULATING BUFFER!!!")
+                        match = re.search(self.regex_pattern, self._buffer)
+                        if match:
+                            code = match.group(1)
+                            print(f"Extracted Code: {code}")  
+
+                            # Create a confirmation message
+                            confirmation_msg = ChatMessage(
+                                role="assistant",
+                                content=f"Code extracted: {code}",
+                            )
+
+                            # Wrap the confirmation message in ChoiceDelta and Choice
+                            choice_delta = ChoiceDelta(
+                                role=confirmation_msg.role,
+                                content=str(confirmation_msg.content)   # we know confirmation_msg.content is a string
+                            )
+                            new_choice = Choice(
+                                delta=choice_delta,
+                                index=choice.index
+                            )
+
+                            # Create a new ChatChunk with the confirmation Choice
+                            confirmation_chunk = ChatChunk(choices=[new_choice])
+
+                            # Yield the confirmation chunk
+                            yield confirmation_chunk
+                            self.accumulating = False
+                            self._buffer = ""
+                        continue  # Skip yielding the original content
+                new_choices.append(choice)
+            if new_choices:
+                yield ChatChunk(choices=new_choices)
+
+    async def __anext__(self) -> ChatChunk:
+        try:
+            return await self._aiter.__anext__()
+        except StopAsyncIteration:
+            await self.aclose()
+            raise
+
+def _01_synthesize_assistant_reply(
+    assistant: VoiceAssistant, chat_ctx: ChatContext
+) -> LLMStream:
+    """
+    Custom function to process the OpenAI compatible endpoint's output.
+    Extracts code from responses matching the <unvoiced code=...></unvoiced> pattern.
+
+    Args:
+        assistant (VoiceAssistant): The VoiceAssistant instance.
+        chat_ctx (ChatContext): The current chat context.
+
+    Returns:
+        LLMStream: The processed LLMStream.
+    """
+    llm_stream = assistant.llm.chat(chat_ctx=chat_ctx, fnc_ctx=assistant.fnc_ctx)
+    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
+    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
+    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
+    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
+    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
+    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
+    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
+    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
+    print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
+
+    return ProcessedLLMStream(original_stream=llm_stream)
+
 # This function is the entrypoint for the agent.
 async def entrypoint(ctx: JobContext):
    # Create an initial chat context with a system prompt
@ -96,6 +205,7 @@ async def entrypoint(ctx: JobContext):
        llm=open_interpreter,  # Language Model
        tts=tts,  # Text-to-Speech
        chat_ctx=initial_ctx,  # Chat history context
+        # will_synthesize_assistant_reply=_01_synthesize_assistant_reply,
    )

    chat = rtc.ChatManager(ctx.room)
@ -118,13 +228,26 @@ async def entrypoint(ctx: JobContext):

    await asyncio.sleep(1)

+    print("HELLO FROM INSIDE THE WORKER")
+    print("HELLO FROM INSIDE THE WORKER")
+    print("HELLO FROM INSIDE THE WORKER")
+    print("HELLO FROM INSIDE THE WORKER")
+    print("HELLO FROM INSIDE THE WORKER")
+
    # Greets the user with an initial message
    await assistant.say(start_message,
    allow_interruptions=True)

+    stt_forwarder = STTSegmentsForwarder(room=ctx.room, participant=ctx.room.local_participant)
+    await stt_forwarder._run()

-def main(livekit_url):

+def main(livekit_url):
+    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
+    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
+    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
+    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
+    print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
    # Workers have to be run as CLIs right now.
    # So we need to simualte running "[this file] dev"

@ -134,5 +257,5 @@ def main(livekit_url):

    # Initialize the worker with the entrypoint
    cli.run_app(
-        WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url)
+        WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url, port=8082)
    )