@ -1,13 +1,47 @@
#!/usr/bin/env python3
"""
AOP Framework Benchmarking Suite
This comprehensive benchmarking suite tests the scaling laws of the AOP ( Agent Orchestration Platform )
framework by measuring latency , throughput , memory usage , and other performance metrics across different
agent counts and configurations .
Features :
- Scaling law analysis ( 1 to 100 + agents )
- Latency and throughput measurements
- Memory usage profiling
- Concurrent execution testing
- Error rate analysis
- Performance visualization with charts
- Statistical analysis and reporting
- Real agent testing with actual LLM calls
Usage :
1. Set your OpenAI API key : export OPENAI_API_KEY = " your-key-here "
2. Install required dependencies : pip install swarms
3. Run the benchmark : python aop_benchmark . py
4. Check results in the generated charts and reports
Configuration :
- Edit BENCHMARK_CONFIG at the top of the file to customize settings
- Adjust model_name , max_agents , and other parameters as needed
- This benchmark ONLY uses real agents with actual LLM calls
Author : AI Assistant
Date : 2024
"""
import gc
import gc
import json
import json
import os
import os
import psutil
import random
import random
import statistics
import statistics
import time
import time
import uuid
import warnings
import warnings
import uuid
from concurrent . futures import ThreadPoolExecutor , as_completed
from concurrent . futures import ThreadPoolExecutor , as_completed
from dataclasses import asdict , dataclass
from dataclasses import dataclass, asdict
from datetime import datetime , timedelta
from datetime import datetime , timedelta
from typing import Any , Dict , List , Tuple
from typing import Any , Dict , List , Tuple
@ -15,22 +49,14 @@ import matplotlib.pyplot as plt
import numpy as np
import numpy as np
import openpyxl
import openpyxl
import pandas as pd
import pandas as pd
import psutil
import seaborn as sns
import seaborn as sns
from dotenv import load_dotenv
from dotenv import load_dotenv
from loguru import logger
from loguru import logger
from openpyxl . styles import Font
from openpyxl . styles import Font
from openpyxl . utils . dataframe import dataframe_to_rows
from openpyxl . utils . dataframe import dataframe_to_rows
from swarms . structs . agent import Agent
from swarms . structs . aop import AOP
from swarms . structs . aop import AOP
from swarms . utils . litellm_wrapper import LiteLLM
# Suppress warnings for cleaner output
warnings . filterwarnings ( " ignore " )
# Load environment variables
load_dotenv ( )
# Configuration
# Configuration
BENCHMARK_CONFIG = {
BENCHMARK_CONFIG = {
" models " : [
" models " : [
@ -60,6 +86,21 @@ BENCHMARK_CONFIG = {
" detailed_logging " : True , # Enable detailed logging
" detailed_logging " : True , # Enable detailed logging
}
}
# Suppress warnings for cleaner output
warnings . filterwarnings ( " ignore " )
# Load environment variables
load_dotenv ( )
# Import swarms Agent directly to avoid uvloop dependency
try :
from swarms . structs . agent import Agent
from swarms . utils . litellm_wrapper import LiteLLM
SWARMS_AVAILABLE = True
except ImportError :
SWARMS_AVAILABLE = False
@dataclass
@dataclass
class BenchmarkResult :
class BenchmarkResult :
@ -375,6 +416,12 @@ class AOPBenchmarkSuite:
" SWARMS_API_KEY or OPENAI_API_KEY environment variable is required for real agent testing "
" SWARMS_API_KEY or OPENAI_API_KEY environment variable is required for real agent testing "
)
)
# Check if swarms is available
if not SWARMS_AVAILABLE :
raise ImportError (
" Swarms not available - install swarms: pip install swarms "
)
# Create LiteLLM instance for the specific model
# Create LiteLLM instance for the specific model
llm = LiteLLM (
llm = LiteLLM (
model_name = model_name ,
model_name = model_name ,
@ -877,7 +924,7 @@ class AOPBenchmarkSuite:
try :
try :
if len ( str ( cell . value ) ) > max_length :
if len ( str ( cell . value ) ) > max_length :
max_length = len ( str ( cell . value ) )
max_length = len ( str ( cell . value ) )
except :
except Exception :
pass
pass
adjusted_width = min ( max_length + 2 , 50 )
adjusted_width = min ( max_length + 2 , 50 )
ws . column_dimensions [ column_letter ] . width = adjusted_width
ws . column_dimensions [ column_letter ] . width = adjusted_width
@ -1616,7 +1663,6 @@ class AOPBenchmarkSuite:
initial_memory = (
initial_memory = (
psutil . Process ( ) . memory_info ( ) . rss / 1024 / 1024
psutil . Process ( ) . memory_info ( ) . rss / 1024 / 1024
)
)
psutil . cpu_percent ( )
# Execute some tasks
# Execute some tasks
available_agents = aop . list_agents ( )
available_agents = aop . list_agents ( )
@ -2439,13 +2485,13 @@ class AOPBenchmarkSuite:
ax2 . grid ( True , alpha = 0.3 )
ax2 . grid ( True , alpha = 0.3 )
# Add value labels on bars
# Add value labels on bars
for i , ( bar , time) in enumerate (
for i , ( bar , exec_ time) in enumerate (
zip ( bars2 , df [ " avg_tool_execution_time " ] )
zip ( bars2 , df [ " avg_tool_execution_time " ] )
) :
) :
ax2 . text (
ax2 . text (
bar . get_x ( ) + bar . get_width ( ) / 2 ,
bar . get_x ( ) + bar . get_width ( ) / 2 ,
bar . get_height ( ) + 0.01 ,
bar . get_height ( ) + 0.01 ,
f " { time: .2f } s " ,
f " { exec_ time: .2f } s " ,
ha = " center " ,
ha = " center " ,
va = " bottom " ,
va = " bottom " ,
fontsize = 8 ,
fontsize = 8 ,
@ -2905,6 +2951,33 @@ def main():
print ( f " Context Length: { BENCHMARK_CONFIG [ ' context_length ' ] } " )
print ( f " Context Length: { BENCHMARK_CONFIG [ ' context_length ' ] } " )
print ( )
print ( )
# Check for required environment variables
api_key = os . getenv ( " SWARMS_API_KEY " ) or os . getenv (
" OPENAI_API_KEY "
)
if not api_key :
print (
" ❌ Error: SWARMS_API_KEY or OPENAI_API_KEY not found in environment variables "
)
print (
" This benchmark requires real LLM calls for accurate performance testing "
)
print (
" Set your API key: export SWARMS_API_KEY= ' your-key-here ' or export OPENAI_API_KEY= ' your-key-here ' "
)
return 1
# Check for required imports
if not SWARMS_AVAILABLE :
print ( " ❌ Error: swarms not available " )
print (
" Install required dependencies: pip install swarms openpyxl "
)
print (
" This benchmark requires swarms framework and Excel support "
)
return 1
# Initialize benchmark suite
# Initialize benchmark suite
benchmark = AOPBenchmarkSuite (
benchmark = AOPBenchmarkSuite (
output_dir = " aop_benchmark_results " ,
output_dir = " aop_benchmark_results " ,