API CHANGES]

7 days ago · 3a1a614d7b
parent 33e7f69450
commit 3a1a614d7b
4 changed files with 635 additions and 151 deletions
--- a/Dockerfile.api
+++ b/Dockerfile.api
@ -0,0 +1,74 @@
 # Build stage
 FROM python:3.11-slim as builder
 # Set working directory
 WORKDIR /app
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Copy requirements from api folder
 COPY api/requirements.txt .
 RUN pip install --no-cache-dir wheel && \
    pip wheel --no-cache-dir --no-deps --wheel-dir /app/wheels -r requirements.txt
 # Final stage
 FROM python:3.11-slim
 # Set environment variables
 ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PATH="/app/venv/bin:$PATH" \
    PYTHONPATH=/app \
    PORT=8080
 # Create app user
 RUN useradd -m -s /bin/bash app && \
    mkdir -p /app/logs && \
    chown -R app:app /app
 # Set working directory
 WORKDIR /app
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Copy wheels from builder
 COPY --from=builder /app/wheels /app/wheels
 # Create and activate virtual environment
 RUN python -m venv /app/venv && \
    /app/venv/bin/pip install --no-cache-dir /app/wheels/*
 # Copy application code
 COPY --chown=app:app ./api ./api
 # Switch to app user
 USER app
 # Create directories for logs
 RUN mkdir -p /app/logs
 # Required environment variables
 ENV SUPABASE_URL="" \
    SUPABASE_SERVICE_KEY="" \
    ENVIRONMENT="production" \
    LOG_LEVEL="info" \
    WORKERS=4 \
    MAX_REQUESTS_PER_MINUTE=60 \
    API_KEY_LENGTH=32
 # Expose port
 EXPOSE $PORT
 # Health check
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:$PORT/health || exit 1
 # Start command
 CMD ["sh", "-c", "uvicorn api.api:app --host 0.0.0.0 --port $PORT --workers $WORKERS --log-level $LOG_LEVEL"]
--- a/api/main.py
+++ b/api/main.py
@ -1,16 +1,15 @@
 import asyncio
 import os
 import secrets
 import signal
 import traceback
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime, timedelta
 from enum import Enum
 from functools import lru_cache
 from pathlib import Path
 from typing import Any, AsyncGenerator, Dict, List, Optional
 from uuid import UUID, uuid4
 from fastapi.concurrency import asynccontextmanager
 import uvicorn
 from dotenv import load_dotenv
 from fastapi import (
@ -20,12 +19,14 @@ from fastapi import (
    Header,
    HTTPException,
    Query,
    Request,
    status,
 )
 from fastapi.concurrency import asynccontextmanager
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from loguru import logger
 from pydantic import BaseModel, Field
 from supabase import Client, create_client
 from swarms.structs.agent import Agent
@ -33,6 +34,90 @@ from swarms.structs.agent import Agent
 load_dotenv()
 class APIKey(BaseModel):
    """Model matching Supabase api_keys table"""
    id: UUID
    created_at: datetime
    name: str
    user_id: UUID
    key: str
    limit_credit_dollar: Optional[float] = None
    is_deleted: bool = False
 class User(BaseModel):
    id: UUID
    name: str
    is_active: bool = True
    is_admin: bool = False
@lru_cache()
 def get_supabase() -> Client:
    """Get cached Supabase client"""
    supabase_url = os.getenv("SUPABASE_URL")
    supabase_key = os.getenv("SUPABASE_SERVICE_KEY")
    if not supabase_url or not supabase_key:
        raise ValueError("Supabase configuration is missing")
    return create_client(supabase_url, supabase_key)
 async def get_current_user(
    api_key: str = Header(..., description="API key for authentication"),
 ) -> User:
    """Validate API key against Supabase and return current user."""
    if not api_key or not api_key.startswith('sk-'):
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="Invalid API key format",
            headers={"WWW-Authenticate": "ApiKey"},
        )
    try:
        supabase = get_supabase()
        # Query the api_keys table
        response = supabase.table('api_keys').select(
            'id, name, user_id, key, limit_credit_dollar, is_deleted'
        ).eq('key', api_key).single().execute()
        if not response.data:
            raise HTTPException(
                status_code=status.HTTP_401_UNAUTHORIZED,
                detail="Invalid API key",
                headers={"WWW-Authenticate": "ApiKey"},
            )
        key_data = response.data
        # Check if key is deleted
        if key_data['is_deleted']:
            raise HTTPException(
                status_code=status.HTTP_401_UNAUTHORIZED,
                detail="API key has been deleted",
                headers={"WWW-Authenticate": "ApiKey"},
            )
        # Check credit limit if applicable
        if key_data['limit_credit_dollar'] is not None and key_data['limit_credit_dollar'] <= 0:
            raise HTTPException(
                status_code=status.HTTP_403_FORBIDDEN,
                detail="API key credit limit exceeded"
            )
        # Create user object
        return User(
            id=key_data['user_id'],
            name=key_data['name'],
            is_active=not key_data['is_deleted']
        )
    except Exception as e:
        logger.error(f"Error validating API key: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="API key validation failed",
            headers={"WWW-Authenticate": "ApiKey"},
        )
 class UvicornServer(uvicorn.Server):
    """Customized uvicorn server with graceful shutdown support"""
@ -59,14 +144,6 @@ class AgentStatus(str, Enum):
 API_KEY_LENGTH = 32  # Length of generated API keys
 class APIKey(BaseModel):
    key: str
    name: str
    created_at: datetime
    last_used: datetime
    is_active: bool = True
 class APIKeyCreate(BaseModel):
    name: str  # A friendly name for the API key
@ -214,11 +291,7 @@ class AgentStore:
    def __init__(self):
        self.agents: Dict[UUID, Agent] = {}
        self.agent_metadata: Dict[UUID, Dict[str, Any]] = {}
-        self.users: Dict[UUID, User] = {}  # user_id -> User
+        self.user_agents: Dict[UUID, List[UUID]] = {}  # user_id -> [agent_ids]
        self.api_keys: Dict[str, UUID] = {}  # api_key -> user_id
        self.user_agents: Dict[UUID, List[UUID]] = (
            {}
        )  # user_id -> [agent_ids]
        self.executor = ThreadPoolExecutor(max_workers=4)
        self._ensure_directories()
@ -227,31 +300,6 @@ class AgentStore:
        Path("logs").mkdir(exist_ok=True)
        Path("states").mkdir(exist_ok=True)
    def create_api_key(self, user_id: UUID, key_name: str) -> APIKey:
        """Create a new API key for a user."""
        if user_id not in self.users:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail="User not found",
            )
        # Generate a secure random API key
        api_key = secrets.token_urlsafe(API_KEY_LENGTH)
        # Create the API key object
        key_object = APIKey(
            key=api_key,
            name=key_name,
            created_at=datetime.utcnow(),
            last_used=datetime.utcnow(),
        )
        # Store the API key
        self.users[user_id].api_keys[api_key] = key_object
        self.api_keys[api_key] = user_id
        return key_object
    async def verify_agent_access(
        self, agent_id: UUID, user_id: UUID
    ) -> bool:
@ -656,94 +704,11 @@ class SwarmsAPI:
        self._setup_routes()
    def _setup_routes(self):
-        """Set up API routes."""
+        """Set up API routes with Supabase authentication."""
        # In your API code
        # Modify the create_user endpoint
        @self.app.post("/v1/users", response_model=Dict[str, Any])
        async def create_user(request: Request):
            """Create a new user and initial API key."""
            try:
                body = await request.json()
                username = body.get("username")
                if not username or len(username) < 3:
                    raise HTTPException(
                        status_code=400, detail="Invalid username"
                    )
                user_id = uuid4()
                user = User(id=user_id, username=username)
                self.store.users[user_id] = user
                # Always create initial API key
                initial_key = self.store.create_api_key(
                    user_id, "Initial Key"
                )
                if not initial_key:
                    raise HTTPException(
                        status_code=500,
                        detail="Failed to create initial API key",
                    )
                return {
                    "user_id": user_id,
                    "api_key": initial_key.key,
                }
            except Exception as e:
                logger.error(f"Error creating user: {str(e)}")
                raise HTTPException(status_code=400, detail=str(e))
        @self.app.get(
            "/v1/users/{user_id}/api-keys",
            response_model=List[APIKey],
        )
        async def list_api_keys(
            user_id: UUID,
            current_user: User = Depends(get_current_user),
        ):
            """List all API keys for a user."""
            if (
                current_user.id != user_id
                and not current_user.is_admin
            ):
                raise HTTPException(
                    status_code=status.HTTP_403_FORBIDDEN,
                    detail="Not authorized to view API keys for this user",
                )
            return list(self.store.users[user_id].api_keys.values())
        @self.app.delete("/v1/users/{user_id}/api-keys/{key}")
        async def revoke_api_key(
            user_id: UUID,
            key: str,
            current_user: User = Depends(get_current_user),
        ):
            """Revoke an API key."""
            if (
                current_user.id != user_id
                and not current_user.is_admin
            ):
                raise HTTPException(
                    status_code=status.HTTP_403_FORBIDDEN,
                    detail="Not authorized to revoke API keys for this user",
                )
            if key in self.store.users[user_id].api_keys:
                self.store.users[user_id].api_keys[
                    key
                ].is_active = False
                del self.store.api_keys[key]
                return {"status": "API key revoked"}
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail="API key not found",
            )
        @self.app.get(
-            "/v1/users/me/agents", response_model=List[AgentSummary]
+            "/v1/users/me/agents",
            response_model=List[AgentSummary]
        )
        async def list_user_agents(
            current_user: User = Depends(get_current_user),
@ -751,24 +716,20 @@ class SwarmsAPI:
            status: Optional[AgentStatus] = None,
        ):
            """List all agents owned by the current user."""
-            user_agents = self.store.user_agents.get(
+            user_agents = self.store.user_agents.get(current_user.id, [])
                current_user.id, []
            )
            return [
                agent
-                for agent in await self.store.list_agents(
+                for agent in await self.store.list_agents(tags, status)
                    tags, status
                )
                if agent.agent_id in user_agents
            ]
        # Modify existing routes to use API key authentication
        @self.app.post("/v1/agent", response_model=Dict[str, UUID])
        async def create_agent(
            config: AgentConfig,
            current_user: User = Depends(get_current_user),
        ):
            """Create a new agent with the specified configuration."""
            logger.info(f"User {current_user.id} creating new agent")
            agent_id = await self.store.create_agent(
                config, current_user.id
            )
@ -776,51 +737,115 @@ class SwarmsAPI:
        @self.app.get("/v1/agents", response_model=List[AgentSummary])
        async def list_agents(
            current_user: User = Depends(get_current_user),
            tags: Optional[List[str]] = Query(None),
            status: Optional[AgentStatus] = None,
        ):
            """List all agents, optionally filtered by tags and status."""
-            return await self.store.list_agents(tags, status)
+            agents = await self.store.list_agents(tags, status)
            # Filter agents based on user access
            return [
                agent for agent in agents
                if await self.store.verify_agent_access(agent.agent_id, current_user.id)
            ]
        @self.app.patch(
-            "/v1/agent/{agent_id}", response_model=Dict[str, str]
+            "/v1/agent/{agent_id}",
            response_model=Dict[str, str]
        )
-        async def update_agent(agent_id: UUID, update: AgentUpdate):
+        async def update_agent(
            agent_id: UUID,
            update: AgentUpdate,
            current_user: User = Depends(get_current_user)
        ):
            """Update an existing agent's configuration."""
            if not await self.store.verify_agent_access(agent_id, current_user.id):
                raise HTTPException(
                    status_code=status.HTTP_403_FORBIDDEN,
                    detail="Not authorized to update this agent"
                )
            await self.store.update_agent(agent_id, update)
            return {"status": "updated"}
        @self.app.get(
            "/v1/agent/{agent_id}/metrics",
-            response_model=AgentMetrics,
+            response_model=AgentMetrics
        )
-        async def get_agent_metrics(agent_id: UUID):
+        async def get_agent_metrics(
            agent_id: UUID,
            current_user: User = Depends(get_current_user)
        ):
            """Get performance metrics for a specific agent."""
            if not await self.store.verify_agent_access(agent_id, current_user.id):
                raise HTTPException(
                    status_code=status.HTTP_403_FORBIDDEN,
                    detail="Not authorized to view this agent's metrics"
                )
            return await self.store.get_agent_metrics(agent_id)
        @self.app.post(
            "/v1/agent/{agent_id}/clone",
-            response_model=Dict[str, UUID],
+            response_model=Dict[str, UUID]
        )
-        async def clone_agent(agent_id: UUID, new_name: str):
+        async def clone_agent(
            agent_id: UUID,
            new_name: str,
            current_user: User = Depends(get_current_user)
        ):
            """Clone an existing agent with a new name."""
            if not await self.store.verify_agent_access(agent_id, current_user.id):
                raise HTTPException(
                    status_code=status.HTTP_403_FORBIDDEN,
                    detail="Not authorized to clone this agent"
                )
            new_id = await self.store.clone_agent(agent_id, new_name)
            # Add the cloned agent to user's agents
            if current_user.id not in self.store.user_agents:
                self.store.user_agents[current_user.id] = []
            self.store.user_agents[current_user.id].append(new_id)
            return {"agent_id": new_id}
        @self.app.delete("/v1/agent/{agent_id}")
-        async def delete_agent(agent_id: UUID):
+        async def delete_agent(
            agent_id: UUID,
            current_user: User = Depends(get_current_user)
        ):
            """Delete an agent."""
            if not await self.store.verify_agent_access(agent_id, current_user.id):
                raise HTTPException(
                    status_code=status.HTTP_403_FORBIDDEN,
                    detail="Not authorized to delete this agent"
                )
            await self.store.delete_agent(agent_id)
            # Remove from user's agents list
            if current_user.id in self.store.user_agents:
                self.store.user_agents[current_user.id] = [
                    aid for aid in self.store.user_agents[current_user.id]
                    if aid != agent_id
                ]
            return {"status": "deleted"}
        @self.app.post(
-            "/v1/agent/completions", response_model=CompletionResponse
+            "/v1/agent/completions",
            response_model=CompletionResponse
        )
        async def create_completion(
            request: CompletionRequest,
            background_tasks: BackgroundTasks,
            current_user: User = Depends(get_current_user)
        ):
            """Process a completion request with the specified agent."""
            if not await self.store.verify_agent_access(request.agent_id, current_user.id):
                raise HTTPException(
                    status_code=status.HTTP_403_FORBIDDEN,
                    detail="Not authorized to use this agent"
                )
            try:
                agent = await self.store.get_agent(request.agent_id)
@ -830,12 +855,13 @@ class SwarmsAPI:
                    request.prompt,
                    request.agent_id,
                    request.max_tokens,
-                    0.5,
+                    request.temperature_override
                )
                # Schedule background cleanup
                background_tasks.add_task(
-                    self._cleanup_old_metrics, request.agent_id
+                    self._cleanup_old_metrics,
                    request.agent_id
                )
                return response
@ -844,25 +870,54 @@ class SwarmsAPI:
                logger.error(f"Error processing completion: {str(e)}")
                raise HTTPException(
                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                    detail=f"Error processing completion: {str(e)}",
+                    detail=f"Error processing completion: {str(e)}"
                )
        @self.app.get("/v1/agent/{agent_id}/status")
-        async def get_agent_status(agent_id: UUID):
+        async def get_agent_status(
            agent_id: UUID,
            current_user: User = Depends(get_current_user)
        ):
            """Get the current status of an agent."""
            if not await self.store.verify_agent_access(agent_id, current_user.id):
                raise HTTPException(
                    status_code=status.HTTP_403_FORBIDDEN,
                    detail="Not authorized to view this agent's status"
                )
            metadata = self.store.agent_metadata.get(agent_id)
            if not metadata:
                raise HTTPException(
                    status_code=status.HTTP_404_NOT_FOUND,
-                    detail=f"Agent {agent_id} not found",
+                    detail=f"Agent {agent_id} not found"
                )
            return {
                "agent_id": agent_id,
                "status": metadata["status"],
                "last_used": metadata["last_used"],
                "total_completions": metadata["total_completions"],
-                "error_count": metadata["error_count"],
+                "error_count": metadata["error_count"]
            }
        @self.app.get("/health")
        async def health_check():
            """Health check endpoint - no auth required."""
            try:
                # Test Supabase connection
                supabase = get_supabase()
                supabase.table('api_keys').select('count', count='exact').execute()
                return {"status": "healthy", "database": "connected"}
            except Exception as e:
                logger.error(f"Health check failed: {str(e)}")
                return JSONResponse(
                    status_code=503,
                    content={
                        "status": "unhealthy",
                        "database": "disconnected",
                        "error": str(e)
                    }
                )
    async def _cleanup_old_metrics(self, agent_id: UUID):
        """Clean up old metrics data to prevent memory bloat."""
@ -888,7 +943,7 @@ class SwarmsAPI:
 class APIServer:
    def __init__(
-        self, app: FastAPI, host: str = "0.0.0.0", port: int = 8000
+        self, app: FastAPI, host: str = "0.0.0.0", port: int = 8080
    ):
        self.app = app
        self.host = host
--- a/api/requirements.txt
+++ b/api/requirements.txt
@ -8,4 +8,6 @@ opentelemetry-api
 opentelemetry-sdk
 opentelemetry-instrumentation-fastapi
 opentelemetry-instrumentation-requests
-opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-exporter-otlp-proto-grpc
 swarms
 supabase
--- a/voice.py
+++ b/voice.py
@ -0,0 +1,353 @@
 from __future__ import annotations
 import asyncio
 import base64
 import io
 import threading
 from os import getenv
 from typing import Any, Awaitable, Callable, cast
 import numpy as np
 try:
    import pyaudio
 except ImportError:
    import subprocess
    subprocess.check_call(["pip", "install", "pyaudio"])
    import pyaudio
 try:
    import sounddevice as sd
 except ImportError:
    import subprocess
    subprocess.check_call(["pip", "install", "sounddevice"])
    import sounddevice as sd
 from loguru import logger
 from openai import AsyncOpenAI
 from openai.resources.beta.realtime.realtime import (
    AsyncRealtimeConnection,
 )
 from openai.types.beta.realtime.session import Session
 try:
    from pydub import AudioSegment
 except ImportError:
    import subprocess
    subprocess.check_call(["pip", "install", "pydub"])
    from pydub import AudioSegment
 from dotenv import load_dotenv
 load_dotenv()
 CHUNK_LENGTH_S = 0.05  # 100ms
 SAMPLE_RATE = 24000
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 # pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
 def audio_to_pcm16_base64(audio_bytes: bytes) -> bytes:
    # load the audio file from the byte stream
    audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
    print(f"Loaded audio: {audio.frame_rate=} {audio.channels=} {audio.sample_width=} {audio.frame_width=}")
    # resample to 24kHz mono pcm16
    pcm_audio = audio.set_frame_rate(SAMPLE_RATE).set_channels(CHANNELS).set_sample_width(2).raw_data
    return pcm_audio
 class AudioPlayerAsync:
    def __init__(self):
        self.queue = []
        self.lock = threading.Lock()
        self.stream = sd.OutputStream(
            callback=self.callback,
            samplerate=SAMPLE_RATE,
            channels=CHANNELS,
            dtype=np.int16,
            blocksize=int(CHUNK_LENGTH_S * SAMPLE_RATE),
        )
        self.playing = False
        self._frame_count = 0
    def callback(self, outdata, frames, time, status):  # noqa
        with self.lock:
            data = np.empty(0, dtype=np.int16)
            # get next item from queue if there is still space in the buffer
            while len(data) < frames and len(self.queue) > 0:
                item = self.queue.pop(0)
                frames_needed = frames - len(data)
                data = np.concatenate((data, item[:frames_needed]))
                if len(item) > frames_needed:
                    self.queue.insert(0, item[frames_needed:])
            self._frame_count += len(data)
            # fill the rest of the frames with zeros if there is no more data
            if len(data) < frames:
                data = np.concatenate((data, np.zeros(frames - len(data), dtype=np.int16)))
        outdata[:] = data.reshape(-1, 1)
    def reset_frame_count(self):
        self._frame_count = 0
    def get_frame_count(self):
        return self._frame_count
    def add_data(self, data: bytes):
        with self.lock:
            # bytes is pcm16 single channel audio data, convert to numpy array
            np_data = np.frombuffer(data, dtype=np.int16)
            self.queue.append(np_data)
            if not self.playing:
                self.start()
    def start(self):
        self.playing = True
        self.stream.start()
    def stop(self):
        self.playing = False
        self.stream.stop()
        with self.lock:
            self.queue = []
    def terminate(self):
        self.stream.close()
 async def send_audio_worker_sounddevice(
    connection: AsyncRealtimeConnection,
    should_send: Callable[[], bool] | None = None,
    start_send: Callable[[], Awaitable[None]] | None = None,
 ):
    sent_audio = False
    device_info = sd.query_devices()
    print(device_info)
    read_size = int(SAMPLE_RATE * 0.02)
    stream = sd.InputStream(
        channels=CHANNELS,
        samplerate=SAMPLE_RATE,
        dtype="int16",
    )
    stream.start()
    try:
        while True:
            if stream.read_available < read_size:
                await asyncio.sleep(0)
                continue
            data, _ = stream.read(read_size)
            if should_send() if should_send else True:
                if not sent_audio and start_send:
                    await start_send()
                await connection.send(
                    {"type": "input_audio_buffer.append", "audio": base64.b64encode(data).decode("utf-8")}
                )
                sent_audio = True
            elif sent_audio:
                print("Done, triggering inference")
                await connection.send({"type": "input_audio_buffer.commit"})
                await connection.send({"type": "response.create", "response": {}})
                sent_audio = False
            await asyncio.sleep(0)
    except KeyboardInterrupt:
        pass
    finally:
        stream.stop()
        stream.close()
 class RealtimeApp:
    """
    A console-based application to handle real-time audio recording and streaming,
    connecting to OpenAI's GPT-4 Realtime API.
    Features:
        - Streams microphone input to the GPT-4 Realtime API.
        - Logs transcription results.
        - Sends text prompts to the GPT-4 Realtime API.
    """
    def __init__(self, system_prompt: str = None) -> None:
        self.connection: AsyncRealtimeConnection | None = None
        self.session: Session | None = None
        self.client = AsyncOpenAI(api_key=getenv("OPENAI_API_KEY"))
        self.audio_player = AudioPlayerAsync()
        self.last_audio_item_id: str | None = None
        self.should_send_audio = asyncio.Event()
        self.connected = asyncio.Event()
        self.system_prompt = system_prompt
    async def initialize_text_prompt(self, text: str) -> None:
        """Initialize and send a text prompt to the OpenAI Realtime API."""
        try:
            async with self.client.beta.realtime.connect(model="gpt-4o-realtime-preview-2024-10-01") as conn:
                self.connection = conn
                await conn.session.update(session={"modalities": ["text"]})
                await conn.conversation.item.create(
                    item={
                        "type": "message",
                        "role": "system",
                        "content": [{"type": "input_text", "text": text}],
                    }
                )
                await conn.response.create()
                async for event in conn:
                    if event.type == "response.text.delta":
                        print(event.delta, flush=True, end="")
                    elif event.type == "response.text.done":
                        print()
                    elif event.type == "response.done":
                        break
        except Exception as e:
            logger.exception(f"Error initializing text prompt: {e}")
    async def handle_realtime_connection(self) -> None:
        """Handle the connection to the OpenAI Realtime API."""
        try:
            async with self.client.beta.realtime.connect(model="gpt-4o-realtime-preview-2024-10-01") as conn:
                self.connection = conn
                self.connected.set()
                logger.info("Connected to OpenAI Realtime API.")
                await conn.session.update(session={"turn_detection": {"type": "server_vad"}})
                acc_items: dict[str, Any] = {}
                async for event in conn:
                    if event.type == "session.created":
                        self.session = event.session
                        assert event.session.id is not None
                        logger.info(f"Session created with ID: {event.session.id}")
                        continue
                    if event.type == "session.updated":
                        self.session = event.session
                        logger.info("Session updated.")
                        continue
                    if event.type == "response.audio.delta":
                        if event.item_id != self.last_audio_item_id:
                            self.audio_player.reset_frame_count()
                            self.last_audio_item_id = event.item_id
                        bytes_data = base64.b64decode(event.delta)
                        self.audio_player.add_data(bytes_data)
                        continue
                    if event.type == "response.audio_transcript.delta":
                        try:
                            text = acc_items[event.item_id]
                        except KeyError:
                            acc_items[event.item_id] = event.delta
                        else:
                            acc_items[event.item_id] = text + event.delta
                        logger.debug(f"Transcription updated: {acc_items[event.item_id]}")
                        continue
                    if event.type == "response.text.delta":
                        print(event.delta, flush=True, end="")
                        continue
                    if event.type == "response.text.done":
                        print()
                        continue
                    if event.type == "response.done":
                        break
        except Exception as e:
            logger.exception(f"Error in realtime connection handler: {e}")
    async def _get_connection(self) -> AsyncRealtimeConnection:
        """Wait for and return the realtime connection."""
        await self.connected.wait()
        assert self.connection is not None
        return self.connection
    async def send_text_prompt(self, text: str) -> None:
        """Send a text prompt to the OpenAI Realtime API."""
        try:
            connection = await self._get_connection()
            if not self.session:
                logger.error("Session is not initialized. Cannot send prompt.")
                return
            logger.info(f"Sending prompt to the model: {text}")
            await connection.conversation.item.create(
                item={
                    "type": "message",
                    "role": "user",
                    "content": [{"type": "input_text", "text": text}],
                }
            )
            await connection.response.create()
        except Exception as e:
            logger.exception(f"Error sending text prompt: {e}")
    async def send_mic_audio(self) -> None:
        """Stream microphone audio to the OpenAI Realtime API."""
        import sounddevice as sd  # type: ignore
        sent_audio = False
        try:
            read_size = int(SAMPLE_RATE * 0.02)
            stream = sd.InputStream(
                channels=CHANNELS, samplerate=SAMPLE_RATE, dtype="int16"
            )
            stream.start()
            while True:
                if stream.read_available < read_size:
                    await asyncio.sleep(0)
                    continue
                await self.should_send_audio.wait()
                data, _ = stream.read(read_size)
                connection = await self._get_connection()
                if not sent_audio:
                    asyncio.create_task(connection.send({"type": "response.cancel"}))
                    sent_audio = True
                await connection.input_audio_buffer.append(audio=base64.b64encode(cast(Any, data)).decode("utf-8"))
                await asyncio.sleep(0)
        except Exception as e:
            logger.exception(f"Error in microphone audio streaming: {e}")
        finally:
            stream.stop()
            stream.close()
    async def run(self) -> None:
        """Start the application tasks."""
        logger.info("Starting application tasks.")
        await asyncio.gather(
            # self.initialize_text_prompt(self.system_prompt),
            self.handle_realtime_connection(),
            self.send_mic_audio()
        )
 if __name__ == "__main__":
    logger.add("realtime_app.log", rotation="10 MB", retention="10 days", level="DEBUG")
    logger.info("Starting RealtimeApp.")
    app = RealtimeApp()
    asyncio.run(app.run())