revert anticipation to default

pull/314/head
Ben Xu 1 week ago
parent 16fb2b3023
commit 8c89960299

@ -3,7 +3,7 @@ import json
import base64 import base64
import traceback import traceback
import io import io
import os import re
from PIL import Image as PIL_Image from PIL import Image as PIL_Image
from openai import OpenAI from openai import OpenAI
@ -20,8 +20,11 @@ INSTRUCTIONS_PROMPT = """Given the conversation context and the current video fr
Rate the severity of violation from 0-10, where 10 is most severe. Rate the severity of violation from 0-10, where 10 is most severe.
Instructions to check: Instructions to check:
1. Ensure that the screenshot is NOT YOUTUBE or other video content 1. Ensure that there is no one in the frame.
"""
RESPONSE_FORMAT = """
Respond in the following JSON format: Respond in the following JSON format:
{ {
"violation_detected": boolean, "violation_detected": boolean,
@ -53,7 +56,7 @@ async def handle_instruction_check(
log_message(f"Violation detected with severity {result['severity_rating']}, triggering assistant response") log_message(f"Violation detected with severity {result['severity_rating']}, triggering assistant response")
# Append violation to chat context # Append violation to chat context
violation_text = f"For the given instructions: {INSTRUCTIONS_PROMPT}\n. Instruction violation frame detected: {result['violation_summary']}\nRecommendations: {result['recommendations']}" violation_text = f"Instruction violation frame detected: {result['violation_summary']}\nRecommendations: {result['recommendations']}"
assistant.chat_ctx.append( assistant.chat_ctx.append(
role="user", role="user",
text=violation_text text=violation_text
@ -75,12 +78,16 @@ async def handle_instruction_check(
# TODO: instead of saying the predetermined response, we'll trigger an assistant response here # TODO: instead of saying the predetermined response, we'll trigger an assistant response here
# we can append the current video frame that triggered the violation to the chat context # we can append the current video frame that triggered the violation to the chat context
stream = assistant.llm.chat( # NOTE: this currently produces an unexpected connection error:
chat_ctx=assistant.chat_ctx, # httpcore.ConnectError: All connection attempts failed
fnc_ctx=assistant.fnc_ctx,
) # stream = assistant.llm.chat(
# chat_ctx=assistant.chat_ctx,
# fnc_ctx=assistant.fnc_ctx,
# )
await assistant.say(stream) # we temporarily default back to saying the predetermined response
await assistant.say(violation_text)
else: else:
log_message("No significant violations detected or severity below threshold") log_message("No significant violations detected or severity below threshold")
except Exception as e: except Exception as e:
@ -93,15 +100,11 @@ async def check_instruction_violation(
chat_ctx: ChatContext, chat_ctx: ChatContext,
video_frame: rtc.VideoFrame, video_frame: rtc.VideoFrame,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Make a call to GPT-4 Vision to check for instruction violations""" """Makes a call to gpt-4o-mini to check for instruction violations"""
log_message("Creating new context for instruction check...") log_message("Creating new context for instruction check...")
try: try:
# pull this from env. client = OpenAI()
interpreter_server_host = os.getenv('INTERPRETER_SERVER_HOST', 'localhost')
interpreter_server_port = os.getenv('INTERPRETER_SERVER_PORT', '8000')
base_url = f"http://{interpreter_server_host}:{interpreter_server_port}/"
client = OpenAI(base_url)
try: try:
# Get raw RGBA data # Get raw RGBA data
@ -135,7 +138,7 @@ async def check_instruction_violation(
{ {
"role": "user", "role": "user",
"content": [ "content": [
{"type": "text", "text": INSTRUCTIONS_PROMPT}, {"type": "text", "text": INSTRUCTIONS_PROMPT + RESPONSE_FORMAT},
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
@ -154,7 +157,12 @@ async def check_instruction_violation(
try: try:
# Parse the response content # Parse the response content
result = json.loads(response.choices[0].message.content) # Clean up the LLM response if it includes ```json ... ```
content = response.choices[0].message.content.strip()
content = re.sub(r'^```(?:json)?', '', content) # remove leading triple backticks and optional 'json'
content = re.sub(r'```$', '', content).strip() # remove trailing triple backticks
result = json.loads(content)
log_message(f"Successfully parsed LLM response: {json.dumps(result, indent=2)}") log_message(f"Successfully parsed LLM response: {json.dumps(result, indent=2)}")
return result return result
except Exception as e: except Exception as e:

Loading…
Cancel
Save