swarms/examples/multi_agent/swarm_router/swarm_router_benchmark.py

#!/usr/bin/env python3
"""
SwarmRouter Performance Benchmark

This script benchmarks the performance improvements in SwarmRouter's _create_swarm method.
It compares the old O(n) elif chain vs the new O(1) factory pattern with caching.
"""

import time
import statistics
from typing import List, Dict, Any
from swarms.structs.swarm_router import SwarmRouter
from swarms.structs.agent import Agent


def create_mock_agents(num_agents: int = 3) -> List[Agent]:
    """Create mock agents for testing purposes."""
    agents = []
    for i in range(num_agents):
        # Create a simple mock agent
        agent = Agent(
            agent_name=f"TestAgent_{i}",
            system_prompt=f"You are test agent {i}",
            model_name="gpt-4o-mini",
            max_loops=1,
        )
        agents.append(agent)
    return agents


def benchmark_swarm_creation(
    swarm_types: List[str],
    num_iterations: int = 100,
    agents: List[Agent] = None,
) -> Dict[str, Dict[str, Any]]:
    """
    Benchmark swarm creation performance for different swarm types.

    Args:
        swarm_types: List of swarm types to test
        num_iterations: Number of iterations to run for each swarm type
        agents: List of agents to use for testing

    Returns:
        Dictionary containing performance metrics for each swarm type
    """
    if agents is None:
        agents = create_mock_agents()

    results = {}

    for swarm_type in swarm_types:
        print(f"Benchmarking {swarm_type}...")
        times = []

        for i in range(num_iterations):
            # Create a fresh SwarmRouter instance for each test
            router = SwarmRouter(
                name=f"test-router-{i}",
                agents=agents,
                swarm_type=swarm_type,
                telemetry_enabled=False,
            )

            # Time the _create_swarm method
            start_time = time.perf_counter()
            try:
                router._create_swarm(task="test task")
                end_time = time.perf_counter()
                times.append(end_time - start_time)
            except Exception as e:
                print(f"Failed to create {swarm_type}: {e}")
                continue

        if times:
            results[swarm_type] = {
                "mean_time": statistics.mean(times),
                "median_time": statistics.median(times),
                "min_time": min(times),
                "max_time": max(times),
                "std_dev": (
                    statistics.stdev(times) if len(times) > 1 else 0
                ),
                "total_iterations": len(times),
            }

    return results


def benchmark_caching_performance(
    swarm_type: str = "SequentialWorkflow",
    num_iterations: int = 50,
    agents: List[Agent] = None,
) -> Dict[str, Any]:
    """
    Benchmark the caching performance by creating the same swarm multiple times.

    Args:
        swarm_type: The swarm type to test
        num_iterations: Number of iterations to run
        agents: List of agents to use for testing

    Returns:
        Dictionary containing caching performance metrics
    """
    if agents is None:
        agents = create_mock_agents()

    print(f"Benchmarking caching performance for {swarm_type}...")

    router = SwarmRouter(
        name="cache-test-router",
        agents=agents,
        swarm_type=swarm_type,
        telemetry_enabled=False,
    )

    first_call_times = []
    cached_call_times = []

    for i in range(num_iterations):
        # Clear cache for first call timing
        router._swarm_cache.clear()

        # Time first call (cache miss)
        start_time = time.perf_counter()
        router._create_swarm(task="test task", iteration=i)
        end_time = time.perf_counter()
        first_call_times.append(end_time - start_time)

        # Time second call (cache hit)
        start_time = time.perf_counter()
        router._create_swarm(task="test task", iteration=i)
        end_time = time.perf_counter()
        cached_call_times.append(end_time - start_time)

    return {
        "first_call_mean": statistics.mean(first_call_times),
        "cached_call_mean": statistics.mean(cached_call_times),
        "speedup_factor": statistics.mean(first_call_times)
        / statistics.mean(cached_call_times),
        "cache_hit_ratio": 1.0,  # 100% cache hit rate in this test
    }


def print_results(results: Dict[str, Dict[str, Any]]):
    """Print benchmark results in a formatted way."""
    print("\n" + "=" * 60)
    print("SWARM CREATION PERFORMANCE BENCHMARK RESULTS")
    print("=" * 60)

    for swarm_type, metrics in results.items():
        print(f"\n{swarm_type}:")
        print(f"  Mean time:   {metrics['mean_time']:.6f} seconds")
        print(f"  Median time: {metrics['median_time']:.6f} seconds")
        print(f"  Min time:    {metrics['min_time']:.6f} seconds")
        print(f"  Max time:    {metrics['max_time']:.6f} seconds")
        print(f"  Std dev:     {metrics['std_dev']:.6f} seconds")
        print(f"  Iterations:  {metrics['total_iterations']}")


def print_caching_results(results: Dict[str, Any]):
    """Print caching benchmark results."""
    print("\n" + "=" * 60)
    print("CACHING PERFORMANCE BENCHMARK RESULTS")
    print("=" * 60)

    print(
        f"First call mean time:  {results['first_call_mean']:.6f} seconds"
    )
    print(
        f"Cached call mean time: {results['cached_call_mean']:.6f} seconds"
    )
    print(f"Speedup factor:        {results['speedup_factor']:.2f}x")
    print(f"Cache hit ratio:       {results['cache_hit_ratio']:.1%}")


def main():
    """Run the complete benchmark suite."""
    print("SwarmRouter Performance Benchmark")
    print(
        "Testing O(1) factory pattern with caching vs O(n) elif chain"
    )
    print("-" * 60)

    # Create test agents
    agents = create_mock_agents(3)

    # Test different swarm types
    swarm_types = [
        "SequentialWorkflow",
        "ConcurrentWorkflow",
        "AgentRearrange",
        "MixtureOfAgents",
        "GroupChat",
        "MultiAgentRouter",
        "HeavySwarm",
        "MALT",
    ]

    # Run creation benchmark
    creation_results = benchmark_swarm_creation(
        swarm_types=swarm_types[:4],  # Test first 4 for speed
        num_iterations=20,
        agents=agents,
    )

    print_results(creation_results)

    # Run caching benchmark
    caching_results = benchmark_caching_performance(
        swarm_type="SequentialWorkflow",
        num_iterations=10,
        agents=agents,
    )

    print_caching_results(caching_results)

    # Calculate overall performance improvement
    if creation_results:
        avg_creation_time = statistics.mean(
            [
                metrics["mean_time"]
                for metrics in creation_results.values()
            ]
        )
        print("\n" + "=" * 60)
        print("PERFORMANCE SUMMARY")
        print("=" * 60)
        print(
            f"Average swarm creation time: {avg_creation_time:.6f} seconds"
        )
        print(
            "Factory pattern provides O(1) lookup vs O(n) elif chain"
        )
        print(
            f"Caching provides {caching_results['speedup_factor']:.2f}x speedup for repeated calls"
        )
        print("=" * 60)


if __name__ == "__main__":
    main()