update instructions

pull/309/head
Ben Xu 3 months ago
parent 121e67e896
commit 9c145d0209

2188
software/poetry.lock generated

File diff suppressed because it is too large Load Diff

@ -21,7 +21,7 @@ async def entrypoint(ctx: JobContext):
openai_api_key = os.getenv("OPENAI_API_KEY") openai_api_key = os.getenv("OPENAI_API_KEY")
model = openai.realtime.RealtimeModel( model = openai.realtime.RealtimeModel(
instructions="You are a helpful assistant and you love kittens", instructions="You are a helpful assistant and you love open-source software",
voice="shimmer", voice="shimmer",
temperature=0.8, temperature=0.8,
modalities=["audio", "text"], modalities=["audio", "text"],

@ -2,20 +2,50 @@ import asyncio
import copy import copy
import os import os
from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
from livekit.agents.transcription import STTSegmentsForwarder
from livekit.agents.llm import ChatContext, ChatMessage from livekit.agents.llm import ChatContext, ChatMessage
from livekit import rtc from livekit import rtc
from livekit.agents import stt, transcription
from livekit.agents.voice_assistant import VoiceAssistant from livekit.agents.voice_assistant import VoiceAssistant
from livekit.plugins import deepgram, openai, silero, elevenlabs from livekit.plugins import deepgram, openai, silero, elevenlabs
from dotenv import load_dotenv from dotenv import load_dotenv
import sys import sys
import numpy as np import numpy as np
from .text_processor import _01_synthesize_assistant_reply
from .video_processor import RemoteVideoProcessor
import logging
from datetime import datetime
load_dotenv() load_dotenv()
# Define the path to the log file
LOG_FILE_PATH = 'worker.txt'
def log_message(message: str):
"""Append a message to the log file with a timestamp."""
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
with open(LOG_FILE_PATH, 'a') as log_file:
log_file.write(f"{timestamp} - {message}\n")
start_message = """Hi! You can hold the white circle below to speak to me. start_message = """Hi! You can hold the white circle below to speak to me.
Try asking what I can do.""" Try asking what I can do."""
async def _forward_transcription(
stt_stream: stt.SpeechStream,
stt_forwarder: transcription.STTSegmentsForwarder,
):
"""Forward the transcription to the client and log the transcript in the console"""
async for ev in stt_stream:
stt_forwarder.update(ev)
if ev.type == stt.SpeechEventType.INTERIM_TRANSCRIPT:
print(ev.alternatives[0].text, end="")
elif ev.type == stt.SpeechEventType.FINAL_TRANSCRIPT:
print("\n")
print(" -> ", ev.alternatives[0].text)
# This function is the entrypoint for the agent. # This function is the entrypoint for the agent.
async def entrypoint(ctx: JobContext): async def entrypoint(ctx: JobContext):
# Create an initial chat context with a system prompt # Create an initial chat context with a system prompt
@ -27,7 +57,7 @@ async def entrypoint(ctx: JobContext):
) )
# Connect to the LiveKit room # Connect to the LiveKit room
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) await ctx.connect()
# Create a black background with a white circle # Create a black background with a white circle
width, height = 640, 480 width, height = 640, 480
@ -96,6 +126,7 @@ async def entrypoint(ctx: JobContext):
llm=open_interpreter, # Language Model llm=open_interpreter, # Language Model
tts=tts, # Text-to-Speech tts=tts, # Text-to-Speech
chat_ctx=initial_ctx, # Chat history context chat_ctx=initial_ctx, # Chat history context
will_synthesize_assistant_reply=_01_synthesize_assistant_reply(ctx.room.local_participant),
) )
chat = rtc.ChatManager(ctx.room) chat = rtc.ChatManager(ctx.room)
@ -122,9 +153,41 @@ async def entrypoint(ctx: JobContext):
await assistant.say(start_message, await assistant.say(start_message,
allow_interruptions=True) allow_interruptions=True)
tasks = []
def main(livekit_url): async def transcribe_track(participant: rtc.RemoteParticipant, track: rtc.Track):
audio_stream = rtc.AudioStream(track)
stt_forwarder = STTSegmentsForwarder(
room=ctx.room, participant=participant, track=track
)
stt_stream = stt.stream()
stt_task = asyncio.create_task(
_forward_transcription(stt_stream, stt_forwarder)
)
tasks.append(stt_task)
async for ev in audio_stream:
stt_stream.push_frame(ev.frame)
@ctx.room.on("track_subscribed")
def on_track_subscribed(
track: rtc.Track,
publication: rtc.TrackPublication,
participant: rtc.RemoteParticipant,
):
log_message(f"Track subscribed: {track.kind}")
if track.kind == rtc.TrackKind.KIND_AUDIO:
tasks.append(asyncio.create_task(transcribe_track(participant, track)))
if track.kind == rtc.TrackKind.KIND_VIDEO:
remote_video_stream = rtc.VideoStream(track=track)
processor = RemoteVideoProcessor(video_stream=remote_video_stream, job_ctx=ctx)
asyncio.create_task(processor.process_frames())
def main(livekit_url):
# Workers have to be run as CLIs right now. # Workers have to be run as CLIs right now.
# So we need to simualte running "[this file] dev" # So we need to simualte running "[this file] dev"

Loading…
Cancel
Save