|
|
|
import asyncio
|
|
|
|
import copy
|
|
|
|
import os
|
|
|
|
from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
|
|
|
|
from livekit.agents.llm import ChatContext, ChatMessage
|
|
|
|
from livekit import rtc
|
|
|
|
from livekit.agents.voice_assistant import VoiceAssistant
|
|
|
|
from livekit.plugins import deepgram, openai, silero, elevenlabs
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
load_dotenv()
|
|
|
|
|
|
|
|
# This function is the entrypoint for the agent.
|
|
|
|
async def entrypoint(ctx: JobContext):
|
|
|
|
# Create an initial chat context with a system prompt
|
|
|
|
initial_ctx = ChatContext().append(
|
|
|
|
role="system",
|
|
|
|
text=(
|
|
|
|
"You are a voice assistant created by LiveKit. Your interface with users will be voice. "
|
|
|
|
"You should use short and concise responses, and avoiding usage of unpronounceable punctuation."
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
# Connect to the LiveKit room
|
|
|
|
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
|
|
|
|
|
|
|
# VoiceAssistant is a class that creates a full conversational AI agent.
|
|
|
|
# See https://github.com/livekit/agents/blob/main/livekit-agents/livekit/agents/voice_assistant/assistant.py
|
|
|
|
# for details on how it works.
|
|
|
|
|
|
|
|
interpreter_server_host = os.getenv('INTERPRETER_SERVER_HOST', '0.0.0.0')
|
|
|
|
interpreter_server_port = os.getenv('INTERPRETER_LIGHT_SERVER_PORT', '8000')
|
|
|
|
|
|
|
|
base_url = f"http://{interpreter_server_host}:{interpreter_server_port}/openai"
|
|
|
|
|
|
|
|
open_interpreter = openai.LLM(
|
|
|
|
model="open-interpreter", base_url=base_url
|
|
|
|
)
|
|
|
|
|
|
|
|
assistant = VoiceAssistant(
|
|
|
|
vad=silero.VAD.load(), # Voice Activity Detection
|
|
|
|
stt=deepgram.STT(), # Speech-to-Text
|
|
|
|
llm=open_interpreter, # Language Model
|
|
|
|
tts=elevenlabs.TTS(), # Text-to-Speech
|
|
|
|
chat_ctx=initial_ctx, # Chat history context
|
|
|
|
)
|
|
|
|
|
|
|
|
chat = rtc.ChatManager(ctx.room)
|
|
|
|
|
|
|
|
async def _answer_from_text(text: str):
|
|
|
|
chat_ctx = copy.deepcopy(assistant._chat_ctx)
|
|
|
|
chat_ctx.messages.append(ChatMessage(role="user", content=text))
|
|
|
|
|
|
|
|
stream = open_interpreter.chat(chat_ctx=chat_ctx)
|
|
|
|
await assistant.say(stream)
|
|
|
|
|
|
|
|
@chat.on("message_received")
|
|
|
|
def on_chat_received(msg: rtc.ChatMessage):
|
|
|
|
if not msg.message:
|
|
|
|
return
|
|
|
|
asyncio.create_task(_answer_from_text(msg.message))
|
|
|
|
|
|
|
|
# Start the voice assistant with the LiveKit room
|
|
|
|
assistant.start(ctx.room)
|
|
|
|
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
|
|
|
|
# Greets the user with an initial message
|
|
|
|
await assistant.say("Hey, how can I help you today?", allow_interruptions=True)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
# Initialize the worker with the entrypoint
|
|
|
|
cli.run_app(
|
|
|
|
WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=os.getenv("LIVEKIT_URL"))
|
|
|
|
)
|