@ -11,38 +11,110 @@ from livekit.agents.multimodal import MultimodalAgent
from livekit . plugins import openai
from livekit . plugins import openai
from dotenv import load_dotenv
from dotenv import load_dotenv
import os
import os
import time
from typing import Annotated
from livekit . agents import llm
# Set the environment variable
os . environ [ ' INTERPRETER_TERMINAL_INPUT_PATIENCE ' ] = ' 200000 '
instructions = """
You are Open Interpreter , a world - class programmer that can complete any goal by executing code .
For advanced requests , start by writing a plan .
When you execute code , it will be executed * * on the user ' s machine** in a stateful Jupyter notebook. The user has given you **full permission** to execute any code necessary to complete the task. Execute the code. You CAN run code on the users machine, using the tool you have access to.
You can access the internet . Run * * any code * * to achieve the goal , and if at first you don ' t succeed, try again and again.
You can install new packages .
If you modify or create a file , YOU MUST THEN OPEN IT to display it to the user .
Be concise . Do NOT send the user a markdown version of your code — just execute the code instantly . Execute the code !
You are capable of * * any * * task .
You MUST remember to pass into the execute_code function a correct JSON input like { " code " : " print( ' hello world ' ) " } and NOT a raw string or something else .
"""
load_dotenv ( )
load_dotenv ( )
async def entrypoint ( ctx : JobContext ) :
async def entrypoint ( ctx : JobContext ) :
from interpreter import interpreter
def execute_code ( code ) :
print ( " --- code --- " )
print ( code )
print ( " --- " )
#time.sleep(2)
# Check if the code contains any file deletion commands
if any ( keyword in code . lower ( ) for keyword in [ ' os.remove ' , ' os.unlink ' , ' shutil.rmtree ' , ' delete file ' , ' rm - ' ] ) :
print ( " Warning: File deletion commands detected. Execution aborted for safety. " )
return " Execution aborted: File deletion commands are not allowed. "
print ( " --- output --- " )
output = " "
for chunk in interpreter . computer . run ( " python " , code ) :
if " content " in chunk and type ( chunk [ " content " ] ) == str :
output + = " \n " + chunk [ " content " ]
print ( chunk [ " content " ] )
print ( " --- " )
output = output . strip ( )
if output == " " :
output = " No output was produced by running this code. "
return output
# first define a class that inherits from llm.FunctionContext
class AssistantFnc ( llm . FunctionContext ) :
# the llm.ai_callable decorator marks this function as a tool available to the LLM
# by default, it'll use the docstring as the function's description
@llm.ai_callable ( )
async def execute (
self ,
# by using the Annotated type, arg description and type are available to the LLM
code : Annotated [
str , llm . TypeInfo ( description = " The Python code to execute " )
] ,
) :
""" Executes Python and returns the output """
return execute_code ( code )
fnc_ctx = AssistantFnc ( )
await ctx . connect ( auto_subscribe = AutoSubscribe . AUDIO_ONLY )
await ctx . connect ( auto_subscribe = AutoSubscribe . AUDIO_ONLY )
participant = await ctx . wait_for_participant ( )
participant = await ctx . wait_for_participant ( )
openai_api_key = os . getenv ( " OPENAI_API_KEY " )
openai_api_key = os . getenv ( " OPENAI_API_KEY " )
model = openai . realtime . RealtimeModel (
model = openai . realtime . RealtimeModel (
instructions = " You are a helpful assistant and you love open-source software " ,
instructions = instructions ,
voice = " shimmer " ,
voice = " shimmer " ,
temperature = 0.8 ,
temperature = 0. 6 ,
modalities = [ " audio " , " text " ] ,
modalities = [ " audio " , " text " ] ,
api_key = openai_api_key ,
api_key = openai_api_key ,
base_url = " wss://api.openai.com/v1 " ,
base_url = " wss://api.openai.com/v1 " ,
)
)
assistant = MultimodalAgent ( model = model )
model . _fnc_ctx = fnc_ctx
assistant = MultimodalAgent ( model = model , fnc_ctx = fnc_ctx )
assistant . start ( ctx . room )
assistant . start ( ctx . room )
session = model . sessions [ 0 ]
# Create a session with the function context
session = model . session (
chat_ctx = llm . ChatContext ( ) ,
fnc_ctx = fnc_ctx ,
)
# Initial message to start the interaction
session . conversation . item . create (
session . conversation . item . create (
llm . ChatMessage (
llm . ChatMessage (
role = " user " ,
role = " user " ,
content = " Please begin the interaction with the user in a manner consistent with your instructions. " ,
content = " Hello! " ,
)
)
)
)
session . response . create ( )
session . response . create ( )
def main ( livekit_url ) :
def main ( livekit_url ) :
# Workers have to be run as CLIs right now.
# Workers have to be run as CLIs right now.
# So we need to simualte running "[this file] dev"
# So we need to simu la te running "[this file] dev"
# Modify sys.argv to set the path to this file as the first argument
# Modify sys.argv to set the path to this file as the first argument
# and 'dev' as the second argument
# and 'dev' as the second argument