process custom flags on worker

pull/309/head
Ben Xu 4 months ago
parent 197417a65b
commit 12efc95688

108
software/poetry.lock generated

@ -4090,67 +4090,69 @@ sympy = "*"
[[package]] [[package]]
name = "open-interpreter" name = "open-interpreter"
version = "0.3.12" version = "0.3.13"
description = "Let language models run code" description = "Let language models run code"
optional = false optional = false
python-versions = "<4,>=3.9" python-versions = ">=3.9,<4"
files = [ files = []
{file = "open_interpreter-0.3.12-py3-none-any.whl", hash = "sha256:0e155568685ea927c2b4b8139e4ce9bdf03ee0f8f5a71030f07988f72b305721"}, develop = false
{file = "open_interpreter-0.3.12.tar.gz", hash = "sha256:ab3ebde071ab19812513880be3dedcccd7c545dbea1b8a31bd054ef6332acbf6"},
] [package.dependencies]
astor = "^0.8.1"
[package.dependencies] fastapi = {version = "^0.111.0", optional = true}
astor = ">=0.8.1,<0.9.0" git-python = "^1.0.3"
fastapi = {version = ">=0.111.0,<0.112.0", optional = true, markers = "extra == \"server\""} google-generativeai = "^0.7.1"
git-python = ">=1.0.3,<2.0.0" html2image = "^2.0.4.3"
google-generativeai = ">=0.7.1,<0.8.0" html2text = "^2024.2.26"
html2image = ">=2.0.4.3,<3.0.0.0" inquirer = "^3.1.3"
html2text = ">=2024.2.26,<2025.0.0" ipykernel = "^6.26.0"
inquirer = ">=3.1.3,<4.0.0" ipywidgets = {version = "^8.1.2", optional = true}
ipykernel = ">=6.26.0,<7.0.0" janus = {version = "^1.0.0", optional = true}
ipywidgets = {version = ">=8.1.2,<9.0.0", optional = true, markers = "extra == \"os\""} jupyter-client = "^8.6.0"
janus = {version = ">=1.0.0,<2.0.0", optional = true, markers = "extra == \"server\""} litellm = "^1.41.26"
jupyter-client = ">=8.6.0,<9.0.0" matplotlib = "^3.8.2"
litellm = ">=1.41.26,<2.0.0" opencv-python = {version = "^4.8.1.78", optional = true}
matplotlib = ">=3.8.2,<4.0.0" platformdirs = "^4.2.0"
nltk = ">=3.8.1,<4.0.0" plyer = {version = "^2.1.0", optional = true}
opencv-python = {version = ">=4.8.1.78,<5.0.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""} psutil = "^5.9.6"
platformdirs = ">=4.2.0,<5.0.0" pyautogui = {version = "^0.9.54", optional = true}
plyer = {version = ">=2.1.0,<3.0.0", optional = true, markers = "extra == \"os\""} pydantic = "^2.6.4"
psutil = ">=5.9.6,<6.0.0" pyperclip = "^1.9.0"
pyautogui = {version = ">=0.9.54,<0.10.0", optional = true, markers = "extra == \"os\""} pyreadline3 = {version = "^3.4.1", markers = "sys_platform == \"win32\""}
pydantic = ">=2.6.4,<3.0.0" pytesseract = {version = "^0.3.10", optional = true}
pyperclip = ">=1.9.0,<2.0.0" pywinctl = {version = "^0.3", optional = true}
pyreadline3 = {version = ">=3.4.1,<4.0.0", markers = "sys_platform == \"win32\""} pyyaml = "^6.0.1"
pytesseract = {version = ">=0.3.10,<0.4.0", optional = true, markers = "extra == \"os\" or extra == \"local\""} rich = "^13.4.2"
pywinctl = {version = ">=0.3,<0.4", optional = true, markers = "extra == \"os\""} screeninfo = {version = "^0.8.1", optional = true}
pyyaml = ">=6.0.1,<7.0.0" selenium = "^4.24.0"
rich = ">=13.4.2,<14.0.0" send2trash = "^1.8.2"
screeninfo = {version = ">=0.8.1,<0.9.0", optional = true, markers = "extra == \"os\""} sentence-transformers = {version = "^2.5.1", optional = true}
selenium = ">=4.24.0,<5.0.0"
send2trash = ">=1.8.2,<2.0.0"
sentence-transformers = {version = ">=2.5.1,<3.0.0", optional = true, markers = "extra == \"os\""}
setuptools = "*" setuptools = "*"
shortuuid = ">=1.0.13,<2.0.0" shortuuid = "^1.0.13"
six = ">=1.16.0,<2.0.0" six = "^1.16.0"
starlette = ">=0.37.2,<0.38.0" starlette = "^0.37.2"
tiktoken = ">=0.7.0,<0.8.0" tiktoken = "^0.7.0"
timm = {version = ">=0.9.16,<0.10.0", optional = true, markers = "extra == \"os\""} timm = {version = "^0.9.16", optional = true}
tokentrim = ">=0.1.13,<0.2.0" tokentrim = "^0.1.13"
toml = ">=0.10.2,<0.11.0" toml = "^0.10.2"
torch = {version = ">=2.2.1,<3.0.0", optional = true, markers = "extra == \"os\" or extra == \"local\""} typer = "^0.12.4"
typer = ">=0.12.4,<0.13.0" uvicorn = {version = "^0.30.1", optional = true}
uvicorn = {version = ">=0.30.1,<0.31.0", optional = true, markers = "extra == \"server\""} webdriver-manager = "^4.0.2"
webdriver-manager = ">=4.0.2,<5.0.0" wget = "^3.2"
wget = ">=3.2,<4.0" yaspin = "^3.0.2"
yaspin = ">=3.0.2,<4.0.0"
[package.extras] [package.extras]
local = ["easyocr (>=1.7.1,<2.0.0)", "einops (>=0.8.0,<0.9.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "torch (>=2.2.1,<3.0.0)", "torchvision (>=0.18.0,<0.19.0)", "transformers (==4.41.2)"] local = ["easyocr (>=1.7.1,<2.0.0)", "einops (>=0.8.0,<0.9.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "torch (>=2.2.1,<3.0.0)", "torchvision (>=0.18.0,<0.19.0)", "transformers (==4.41.2)"]
os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)", "torch (>=2.2.1,<3.0.0)"] os = ["ipywidgets (>=8.1.2,<9.0.0)", "opencv-python (>=4.8.1.78,<5.0.0.0)", "plyer (>=2.1.0,<3.0.0)", "pyautogui (>=0.9.54,<0.10.0)", "pytesseract (>=0.3.10,<0.4.0)", "pywinctl (>=0.3,<0.4)", "screeninfo (>=0.8.1,<0.9.0)", "sentence-transformers (>=2.5.1,<3.0.0)", "timm (>=0.9.16,<0.10.0)"]
safe = ["semgrep (>=1.52.0,<2.0.0)"] safe = ["semgrep (>=1.52.0,<2.0.0)"]
server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=0.30.1,<0.31.0)"] server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=0.30.1,<0.31.0)"]
[package.source]
type = "git"
url = "https://github.com/openinterpreter/open-interpreter.git"
reference = "development"
resolved_reference = "bd24acd89d3caf113a14109106952de2c793432f"
[[package]] [[package]]
name = "openai" name = "openai"
version = "1.36.1" version = "1.36.1"
@ -11078,4 +11080,4 @@ type = ["pytest-mypy"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.10,<3.12" python-versions = ">=3.10,<3.12"
content-hash = "5532f1b0732f4c73e30db78b717afb0fd5cf327f1bf093f073cec113428cc4b9" content-hash = "7c40eca9d5b84b65894ddef258e1d7237d1698a62db1a68ee42871fe20d18f6f"

@ -19,7 +19,7 @@ livekit-plugins-openai = "^0.8.1"
livekit-plugins-silero = "^0.6.4" livekit-plugins-silero = "^0.6.4"
livekit-plugins-elevenlabs = "^0.7.3" livekit-plugins-elevenlabs = "^0.7.3"
segno = "^1.6.1" segno = "^1.6.1"
open-interpreter = {extras = ["os", "server"], version = "^0.3.12"} # You should add a "browser" extra, so selenium isn't in the main package open-interpreter = {git = "https://github.com/openinterpreter/open-interpreter.git", rev = "development", extras = ["os", "server"]}
ngrok = "^1.4.0" ngrok = "^1.4.0"
realtimetts = {extras = ["all"], version = "^0.4.5"} realtimetts = {extras = ["all"], version = "^0.4.5"}
realtimestt = "^0.2.41" realtimestt = "^0.2.41"

@ -1,21 +1,130 @@
import asyncio import asyncio
import copy import copy
import os import os
import re
from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
from livekit.agents.llm import ChatContext, ChatMessage from livekit.agents.transcription import STTSegmentsForwarder
from livekit.agents.llm import ChatContext, ChatMessage, LLMStream, ChatChunk, ChoiceDelta, Choice
from livekit import rtc from livekit import rtc
from livekit.agents.voice_assistant import VoiceAssistant from livekit.agents.voice_assistant import VoiceAssistant
from livekit.plugins import deepgram, openai, silero, elevenlabs from livekit.plugins import deepgram, openai, silero, elevenlabs
from dotenv import load_dotenv from dotenv import load_dotenv
import sys import sys
import numpy as np import numpy as np
from typing import AsyncIterator
load_dotenv() load_dotenv()
start_message = """Hi! You can hold the white circle below to speak to me. start_message = """Hi! You can hold the white circle below to speak to me.
Try asking what I can do.""" Try asking what I can do."""
class ProcessedLLMStream(LLMStream):
def __init__(
self,
original_stream: LLMStream,
regex_pattern: str = r'<unvoiced code="([^"]+)"></unvoiced>',
) -> None:
super().__init__(chat_ctx=original_stream.chat_ctx, fnc_ctx=original_stream.fnc_ctx)
self.original_stream = original_stream
self.regex_pattern = regex_pattern
self.init_match = '<.*?' # match for the '<' and any characters to the left of it
self.accumulating = False
self._aiter = self._process_stream()
self._buffer = ""
async def _process_stream(self) -> AsyncIterator[ChatChunk]:
async for chunk in self.original_stream:
new_choices = []
for choice in chunk.choices:
content = choice.delta.content
if content:
init_match = re.search(self.init_match, content)
if init_match:
print("INITIAL MATCH FOUND!!!!!!")
print("INITIAL MATCH FOUND!!!!!!")
print("INITIAL MATCH FOUND!!!!!!")
print("INITIAL MATCH FOUND!!!!!!")
print("INITIAL MATCH FOUND!!!!!!")
print("INITIAL MATCH FOUND!!!!!!")
print("INITIAL MATCH FOUND!!!!!!")
self.accumulating = True
if self.accumulating:
self._buffer += content
print("ACCUMULATING BUFFER!!!")
print("ACCUMULATING BUFFER!!!")
print("ACCUMULATING BUFFER!!!")
print("ACCUMULATING BUFFER!!!")
print("ACCUMULATING BUFFER!!!")
print("ACCUMULATING BUFFER!!!")
match = re.search(self.regex_pattern, self._buffer)
if match:
code = match.group(1)
print(f"Extracted Code: {code}")
# Create a confirmation message
confirmation_msg = ChatMessage(
role="assistant",
content=f"Code extracted: {code}",
)
# Wrap the confirmation message in ChoiceDelta and Choice
choice_delta = ChoiceDelta(
role=confirmation_msg.role,
content=str(confirmation_msg.content) # we know confirmation_msg.content is a string
)
new_choice = Choice(
delta=choice_delta,
index=choice.index
)
# Create a new ChatChunk with the confirmation Choice
confirmation_chunk = ChatChunk(choices=[new_choice])
# Yield the confirmation chunk
yield confirmation_chunk
self.accumulating = False
self._buffer = ""
continue # Skip yielding the original content
new_choices.append(choice)
if new_choices:
yield ChatChunk(choices=new_choices)
async def __anext__(self) -> ChatChunk:
try:
return await self._aiter.__anext__()
except StopAsyncIteration:
await self.aclose()
raise
def _01_synthesize_assistant_reply(
assistant: VoiceAssistant, chat_ctx: ChatContext
) -> LLMStream:
"""
Custom function to process the OpenAI compatible endpoint's output.
Extracts code from responses matching the <unvoiced code=...></unvoiced> pattern.
Args:
assistant (VoiceAssistant): The VoiceAssistant instance.
chat_ctx (ChatContext): The current chat context.
Returns:
LLMStream: The processed LLMStream.
"""
llm_stream = assistant.llm.chat(chat_ctx=chat_ctx, fnc_ctx=assistant.fnc_ctx)
print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
print("HELLO FROM INSIDE OUR CUSTOM LLM STREAM")
return ProcessedLLMStream(original_stream=llm_stream)
# This function is the entrypoint for the agent. # This function is the entrypoint for the agent.
async def entrypoint(ctx: JobContext): async def entrypoint(ctx: JobContext):
# Create an initial chat context with a system prompt # Create an initial chat context with a system prompt
@ -96,6 +205,7 @@ async def entrypoint(ctx: JobContext):
llm=open_interpreter, # Language Model llm=open_interpreter, # Language Model
tts=tts, # Text-to-Speech tts=tts, # Text-to-Speech
chat_ctx=initial_ctx, # Chat history context chat_ctx=initial_ctx, # Chat history context
# will_synthesize_assistant_reply=_01_synthesize_assistant_reply,
) )
chat = rtc.ChatManager(ctx.room) chat = rtc.ChatManager(ctx.room)
@ -118,13 +228,26 @@ async def entrypoint(ctx: JobContext):
await asyncio.sleep(1) await asyncio.sleep(1)
print("HELLO FROM INSIDE THE WORKER")
print("HELLO FROM INSIDE THE WORKER")
print("HELLO FROM INSIDE THE WORKER")
print("HELLO FROM INSIDE THE WORKER")
print("HELLO FROM INSIDE THE WORKER")
# Greets the user with an initial message # Greets the user with an initial message
await assistant.say(start_message, await assistant.say(start_message,
allow_interruptions=True) allow_interruptions=True)
stt_forwarder = STTSegmentsForwarder(room=ctx.room, participant=ctx.room.local_participant)
await stt_forwarder._run()
def main(livekit_url): def main(livekit_url):
print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
print("Starting worker!!!!!!! 🦅🦅🦅🦅🦅🦅")
# Workers have to be run as CLIs right now. # Workers have to be run as CLIs right now.
# So we need to simualte running "[this file] dev" # So we need to simualte running "[this file] dev"
@ -134,5 +257,5 @@ def main(livekit_url):
# Initialize the worker with the entrypoint # Initialize the worker with the entrypoint
cli.run_app( cli.run_app(
WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url) WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url, port=8082)
) )
Loading…
Cancel
Save