|
|
import sys
|
|
|
|
|
|
from loguru import logger
|
|
|
|
|
|
from swarms.agents.reasoning_agents import (
|
|
|
ReasoningAgentInitializationError,
|
|
|
ReasoningAgentRouter,
|
|
|
)
|
|
|
|
|
|
|
|
|
def test_router_initialization():
|
|
|
"""
|
|
|
Test ReasoningAgentRouter initialization with various configurations.
|
|
|
|
|
|
Tests:
|
|
|
- Default initialization
|
|
|
- Custom parameter initialization
|
|
|
- All agent types initialization
|
|
|
"""
|
|
|
logger.info("Starting router initialization tests...")
|
|
|
|
|
|
# Test 1: Default initialization
|
|
|
logger.info("Test 1: Default initialization")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter()
|
|
|
assert router is not None, "Default router should not be None"
|
|
|
assert (
|
|
|
router.agent_name == "reasoning_agent"
|
|
|
), f"Expected 'reasoning_agent', got {router.agent_name}"
|
|
|
assert (
|
|
|
router.swarm_type == "reasoning-duo"
|
|
|
), f"Expected 'reasoning-duo', got {router.swarm_type}"
|
|
|
assert (
|
|
|
router.model_name == "gpt-4o-mini"
|
|
|
), f"Expected 'gpt-4o-mini', got {router.model_name}"
|
|
|
logger.success("✓ Default initialization test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ Default initialization test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 2: Custom parameters initialization
|
|
|
logger.info("Test 2: Custom parameters initialization")
|
|
|
try:
|
|
|
custom_router = ReasoningAgentRouter(
|
|
|
agent_name="test_agent",
|
|
|
description="Test agent for unit testing",
|
|
|
model_name="gpt-4",
|
|
|
system_prompt="You are a test agent.",
|
|
|
max_loops=5,
|
|
|
swarm_type="self-consistency",
|
|
|
num_samples=3,
|
|
|
output_type="dict-all-except-first",
|
|
|
num_knowledge_items=10,
|
|
|
memory_capacity=20,
|
|
|
eval=True,
|
|
|
random_models_on=True,
|
|
|
majority_voting_prompt="Custom voting prompt",
|
|
|
reasoning_model_name="claude-3-5-sonnet-20240620",
|
|
|
)
|
|
|
assert (
|
|
|
custom_router is not None
|
|
|
), "Custom router should not be None"
|
|
|
assert (
|
|
|
custom_router.agent_name == "test_agent"
|
|
|
), f"Expected 'test_agent', got {custom_router.agent_name}"
|
|
|
assert (
|
|
|
custom_router.swarm_type == "self-consistency"
|
|
|
), f"Expected 'self-consistency', got {custom_router.swarm_type}"
|
|
|
assert (
|
|
|
custom_router.max_loops == 5
|
|
|
), f"Expected 5, got {custom_router.max_loops}"
|
|
|
assert (
|
|
|
custom_router.num_samples == 3
|
|
|
), f"Expected 3, got {custom_router.num_samples}"
|
|
|
logger.success(
|
|
|
"✓ Custom parameters initialization test passed"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ Custom parameters initialization test failed: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
# Test 3: All agent types initialization
|
|
|
logger.info("Test 3: All agent types initialization")
|
|
|
agent_types = [
|
|
|
"reasoning-duo",
|
|
|
"reasoning-agent",
|
|
|
"self-consistency",
|
|
|
"consistency-agent",
|
|
|
"ire",
|
|
|
"ire-agent",
|
|
|
"ReflexionAgent",
|
|
|
"GKPAgent",
|
|
|
"AgentJudge",
|
|
|
]
|
|
|
|
|
|
for agent_type in agent_types:
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(swarm_type=agent_type)
|
|
|
assert (
|
|
|
router is not None
|
|
|
), f"Router for {agent_type} should not be None"
|
|
|
assert (
|
|
|
router.swarm_type == agent_type
|
|
|
), f"Expected {agent_type}, got {router.swarm_type}"
|
|
|
logger.info(f"✓ {agent_type} initialization successful")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ {agent_type} initialization failed: {e}")
|
|
|
raise
|
|
|
|
|
|
logger.success("✓ All router initialization tests passed")
|
|
|
|
|
|
|
|
|
def test_reliability_check():
|
|
|
"""
|
|
|
Test reliability_check method with various invalid configurations.
|
|
|
|
|
|
Tests:
|
|
|
- Zero max_loops
|
|
|
- Empty model_name
|
|
|
- Empty swarm_type
|
|
|
- None model_name
|
|
|
- None swarm_type
|
|
|
"""
|
|
|
logger.info("Starting reliability check tests...")
|
|
|
|
|
|
# Test 1: Zero max_loops
|
|
|
logger.info("Test 1: Zero max_loops should raise error")
|
|
|
try:
|
|
|
ReasoningAgentRouter(max_loops=0)
|
|
|
assert (
|
|
|
False
|
|
|
), "Should have raised ReasoningAgentInitializationError"
|
|
|
except ReasoningAgentInitializationError as e:
|
|
|
assert "Max loops must be greater than 0" in str(
|
|
|
e
|
|
|
), f"Expected max loops error, got: {e}"
|
|
|
logger.success("✓ Zero max_loops error handling test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ Zero max_loops test failed with unexpected error: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
# Test 2: Empty model_name
|
|
|
logger.info("Test 2: Empty model_name should raise error")
|
|
|
try:
|
|
|
ReasoningAgentRouter(model_name="")
|
|
|
assert (
|
|
|
False
|
|
|
), "Should have raised ReasoningAgentInitializationError"
|
|
|
except ReasoningAgentInitializationError as e:
|
|
|
assert "Model name must be provided" in str(
|
|
|
e
|
|
|
), f"Expected model name error, got: {e}"
|
|
|
logger.success(
|
|
|
"✓ Empty model_name error handling test passed"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ Empty model_name test failed with unexpected error: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
# Test 3: None model_name
|
|
|
logger.info("Test 3: None model_name should raise error")
|
|
|
try:
|
|
|
ReasoningAgentRouter(model_name=None)
|
|
|
assert (
|
|
|
False
|
|
|
), "Should have raised ReasoningAgentInitializationError"
|
|
|
except ReasoningAgentInitializationError as e:
|
|
|
assert "Model name must be provided" in str(
|
|
|
e
|
|
|
), f"Expected model name error, got: {e}"
|
|
|
logger.success("✓ None model_name error handling test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ None model_name test failed with unexpected error: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
# Test 4: Empty swarm_type
|
|
|
logger.info("Test 4: Empty swarm_type should raise error")
|
|
|
try:
|
|
|
ReasoningAgentRouter(swarm_type="")
|
|
|
assert (
|
|
|
False
|
|
|
), "Should have raised ReasoningAgentInitializationError"
|
|
|
except ReasoningAgentInitializationError as e:
|
|
|
assert "Swarm type must be provided" in str(
|
|
|
e
|
|
|
), f"Expected swarm type error, got: {e}"
|
|
|
logger.success(
|
|
|
"✓ Empty swarm_type error handling test passed"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ Empty swarm_type test failed with unexpected error: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
# Test 5: None swarm_type
|
|
|
logger.info("Test 5: None swarm_type should raise error")
|
|
|
try:
|
|
|
ReasoningAgentRouter(swarm_type=None)
|
|
|
assert (
|
|
|
False
|
|
|
), "Should have raised ReasoningAgentInitializationError"
|
|
|
except ReasoningAgentInitializationError as e:
|
|
|
assert "Swarm type must be provided" in str(
|
|
|
e
|
|
|
), f"Expected swarm type error, got: {e}"
|
|
|
logger.success("✓ None swarm_type error handling test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ None swarm_type test failed with unexpected error: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
logger.success("✓ All reliability check tests passed")
|
|
|
|
|
|
|
|
|
def test_agent_factories():
|
|
|
"""
|
|
|
Test all agent factory methods for each agent type.
|
|
|
|
|
|
Tests:
|
|
|
- _create_reasoning_duo
|
|
|
- _create_consistency_agent
|
|
|
- _create_ire_agent
|
|
|
- _create_agent_judge
|
|
|
- _create_reflexion_agent
|
|
|
- _create_gkp_agent
|
|
|
"""
|
|
|
logger.info("Starting agent factory tests...")
|
|
|
|
|
|
# Test configuration
|
|
|
test_config = {
|
|
|
"agent_name": "test_agent",
|
|
|
"description": "Test agent",
|
|
|
"model_name": "gpt-4o-mini",
|
|
|
"system_prompt": "Test prompt",
|
|
|
"max_loops": 2,
|
|
|
"num_samples": 3,
|
|
|
"output_type": "dict-all-except-first",
|
|
|
"num_knowledge_items": 5,
|
|
|
"memory_capacity": 10,
|
|
|
"eval": False,
|
|
|
"random_models_on": False,
|
|
|
"majority_voting_prompt": None,
|
|
|
"reasoning_model_name": "claude-3-5-sonnet-20240620",
|
|
|
}
|
|
|
|
|
|
# Test 1: Reasoning Duo factory
|
|
|
logger.info("Test 1: _create_reasoning_duo")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="reasoning-duo", **test_config
|
|
|
)
|
|
|
agent = router._create_reasoning_duo()
|
|
|
assert (
|
|
|
agent is not None
|
|
|
), "Reasoning duo agent should not be None"
|
|
|
logger.success("✓ _create_reasoning_duo test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ _create_reasoning_duo test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 2: Consistency Agent factory
|
|
|
logger.info("Test 2: _create_consistency_agent")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="self-consistency", **test_config
|
|
|
)
|
|
|
agent = router._create_consistency_agent()
|
|
|
assert (
|
|
|
agent is not None
|
|
|
), "Consistency agent should not be None"
|
|
|
logger.success("✓ _create_consistency_agent test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ _create_consistency_agent test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 3: IRE Agent factory
|
|
|
logger.info("Test 3: _create_ire_agent")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(swarm_type="ire", **test_config)
|
|
|
agent = router._create_ire_agent()
|
|
|
assert agent is not None, "IRE agent should not be None"
|
|
|
logger.success("✓ _create_ire_agent test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ _create_ire_agent test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 4: Agent Judge factory
|
|
|
logger.info("Test 4: _create_agent_judge")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="AgentJudge", **test_config
|
|
|
)
|
|
|
agent = router._create_agent_judge()
|
|
|
assert agent is not None, "Agent judge should not be None"
|
|
|
logger.success("✓ _create_agent_judge test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ _create_agent_judge test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 5: Reflexion Agent factory
|
|
|
logger.info("Test 5: _create_reflexion_agent")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="ReflexionAgent", **test_config
|
|
|
)
|
|
|
agent = router._create_reflexion_agent()
|
|
|
assert agent is not None, "Reflexion agent should not be None"
|
|
|
logger.success("✓ _create_reflexion_agent test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ _create_reflexion_agent test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 6: GKP Agent factory
|
|
|
logger.info("Test 6: _create_gkp_agent")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="GKPAgent", **test_config
|
|
|
)
|
|
|
agent = router._create_gkp_agent()
|
|
|
assert agent is not None, "GKP agent should not be None"
|
|
|
logger.success("✓ _create_gkp_agent test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ _create_gkp_agent test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
logger.success("✓ All agent factory tests passed")
|
|
|
|
|
|
|
|
|
def test_select_swarm():
|
|
|
"""
|
|
|
Test select_swarm method for all supported agent types.
|
|
|
|
|
|
Tests:
|
|
|
- All valid agent types
|
|
|
- Invalid agent type
|
|
|
"""
|
|
|
logger.info("Starting select_swarm tests...")
|
|
|
|
|
|
agent_types = [
|
|
|
"reasoning-duo",
|
|
|
"reasoning-agent",
|
|
|
"self-consistency",
|
|
|
"consistency-agent",
|
|
|
"ire",
|
|
|
"ire-agent",
|
|
|
"ReflexionAgent",
|
|
|
"GKPAgent",
|
|
|
"AgentJudge",
|
|
|
]
|
|
|
|
|
|
# Test all valid agent types
|
|
|
for agent_type in agent_types:
|
|
|
logger.info(f"Test: select_swarm for {agent_type}")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(swarm_type=agent_type)
|
|
|
swarm = router.select_swarm()
|
|
|
assert (
|
|
|
swarm is not None
|
|
|
), f"Swarm for {agent_type} should not be None"
|
|
|
logger.success(
|
|
|
f"✓ select_swarm for {agent_type} test passed"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ select_swarm for {agent_type} test failed: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
# Test invalid agent type
|
|
|
logger.info("Test: Invalid agent type should raise error")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(swarm_type="invalid_type")
|
|
|
swarm = router.select_swarm()
|
|
|
assert (
|
|
|
False
|
|
|
), "Should have raised ReasoningAgentInitializationError"
|
|
|
except ReasoningAgentInitializationError as e:
|
|
|
assert "Invalid swarm type" in str(
|
|
|
e
|
|
|
), f"Expected invalid swarm type error, got: {e}"
|
|
|
logger.success(
|
|
|
"✓ Invalid agent type error handling test passed"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ Invalid agent type test failed with unexpected error: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
logger.success("✓ All select_swarm tests passed")
|
|
|
|
|
|
|
|
|
def test_run_method():
|
|
|
"""
|
|
|
Test run method with different agent types and tasks.
|
|
|
|
|
|
Tests:
|
|
|
- Method structure and signature
|
|
|
- Actual execution with mock tasks
|
|
|
- Return value validation (non-None)
|
|
|
- Error handling for invalid inputs
|
|
|
"""
|
|
|
logger.info("Starting run method tests...")
|
|
|
|
|
|
# Test configuration for different agent types
|
|
|
test_configs = [
|
|
|
{"swarm_type": "reasoning-duo", "max_loops": 1},
|
|
|
{"swarm_type": "self-consistency", "num_samples": 2},
|
|
|
{"swarm_type": "ire", "max_loops": 1},
|
|
|
{"swarm_type": "ReflexionAgent", "max_loops": 1},
|
|
|
{"swarm_type": "GKPAgent"},
|
|
|
{"swarm_type": "AgentJudge", "max_loops": 1},
|
|
|
]
|
|
|
|
|
|
test_tasks = [
|
|
|
"What is 2+2?",
|
|
|
"Explain the concept of recursion in programming.",
|
|
|
"List three benefits of renewable energy.",
|
|
|
]
|
|
|
|
|
|
for config in test_configs:
|
|
|
agent_type = config["swarm_type"]
|
|
|
logger.info(f"Test: run method for {agent_type}")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(**config)
|
|
|
|
|
|
# Test 1: Method structure
|
|
|
logger.info(f"Test 1: Method structure for {agent_type}")
|
|
|
assert hasattr(
|
|
|
router, "run"
|
|
|
), "Router should have run method"
|
|
|
assert callable(
|
|
|
router.run
|
|
|
), "run method should be callable"
|
|
|
|
|
|
# Test method signature
|
|
|
import inspect
|
|
|
|
|
|
sig = inspect.signature(router.run)
|
|
|
assert (
|
|
|
"task" in sig.parameters
|
|
|
), "run method should have 'task' parameter"
|
|
|
logger.success(
|
|
|
f"✓ Method structure for {agent_type} test passed"
|
|
|
)
|
|
|
|
|
|
# Test 2: Actual execution with mock tasks
|
|
|
logger.info(f"Test 2: Actual execution for {agent_type}")
|
|
|
for i, task in enumerate(test_tasks):
|
|
|
try:
|
|
|
# Note: This will fail without API keys, but we test the method call structure
|
|
|
# and catch the expected error to verify the method is working
|
|
|
result = router.run(task)
|
|
|
# If we get here (unlikely without API keys), verify result is not None
|
|
|
assert (
|
|
|
result is not None
|
|
|
), f"Result for task {i+1} should not be None"
|
|
|
logger.info(
|
|
|
f"✓ Task {i+1} execution successful for {agent_type}"
|
|
|
)
|
|
|
except Exception as run_error:
|
|
|
# Expected to fail without API keys, but verify it's a reasonable error
|
|
|
error_msg = str(run_error).lower()
|
|
|
if any(
|
|
|
keyword in error_msg
|
|
|
for keyword in [
|
|
|
"api",
|
|
|
"key",
|
|
|
"auth",
|
|
|
"token",
|
|
|
"openai",
|
|
|
"anthropic",
|
|
|
]
|
|
|
):
|
|
|
logger.info(
|
|
|
f"✓ Task {i+1} failed as expected (no API key) for {agent_type}"
|
|
|
)
|
|
|
else:
|
|
|
# If it's not an API key error, it might be a real issue
|
|
|
logger.warning(
|
|
|
f"Task {i+1} failed with unexpected error for {agent_type}: {run_error}"
|
|
|
)
|
|
|
|
|
|
# Test 3: Error handling for invalid inputs
|
|
|
logger.info(f"Test 3: Error handling for {agent_type}")
|
|
|
try:
|
|
|
# Test with empty task
|
|
|
result = router.run("")
|
|
|
# If we get here, the method should handle empty strings gracefully
|
|
|
logger.info(f"✓ Empty task handling for {agent_type}")
|
|
|
except Exception:
|
|
|
# This is also acceptable - empty task might be rejected
|
|
|
logger.info(
|
|
|
f"✓ Empty task properly rejected for {agent_type}"
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
# Test with None task
|
|
|
result = router.run(None)
|
|
|
# If we get here, the method should handle None gracefully
|
|
|
logger.info(f"✓ None task handling for {agent_type}")
|
|
|
except Exception:
|
|
|
# This is also acceptable - None task might be rejected
|
|
|
logger.info(
|
|
|
f"✓ None task properly rejected for {agent_type}"
|
|
|
)
|
|
|
|
|
|
logger.success(
|
|
|
f"✓ All run method tests for {agent_type} passed"
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ run method for {agent_type} test failed: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
logger.success("✓ All run method tests passed")
|
|
|
|
|
|
|
|
|
def test_batched_run_method():
|
|
|
"""
|
|
|
Test batched_run method with multiple tasks.
|
|
|
|
|
|
Tests:
|
|
|
- Method existence and callability
|
|
|
- Parameter validation
|
|
|
- Actual execution with multiple tasks
|
|
|
- Return value validation (list of non-None results)
|
|
|
"""
|
|
|
logger.info("Starting batched_run method tests...")
|
|
|
|
|
|
# Test configuration
|
|
|
router = ReasoningAgentRouter(swarm_type="reasoning-duo")
|
|
|
|
|
|
# Test 1: Method existence and callability
|
|
|
logger.info("Test 1: Method existence and callability")
|
|
|
try:
|
|
|
assert hasattr(
|
|
|
router, "batched_run"
|
|
|
), "Router should have batched_run method"
|
|
|
assert callable(
|
|
|
router.batched_run
|
|
|
), "batched_run method should be callable"
|
|
|
logger.success(
|
|
|
"✓ Method existence and callability test passed"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ Method existence test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 2: Parameter validation
|
|
|
logger.info("Test 2: Parameter validation")
|
|
|
try:
|
|
|
import inspect
|
|
|
|
|
|
sig = inspect.signature(router.batched_run)
|
|
|
assert (
|
|
|
"tasks" in sig.parameters
|
|
|
), "batched_run method should have 'tasks' parameter"
|
|
|
logger.success("✓ Parameter validation test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ Parameter validation test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 3: Actual execution with multiple tasks
|
|
|
logger.info("Test 3: Actual execution with multiple tasks")
|
|
|
test_tasks = [
|
|
|
"What is 2+2?",
|
|
|
"What is the capital of France?",
|
|
|
"Explain photosynthesis briefly.",
|
|
|
]
|
|
|
|
|
|
try:
|
|
|
# This will likely fail without API keys, but we test the method call structure
|
|
|
results = router.batched_run(test_tasks)
|
|
|
|
|
|
# If we get here (unlikely without API keys), verify results
|
|
|
assert isinstance(
|
|
|
results, list
|
|
|
), "batched_run should return a list"
|
|
|
assert len(results) == len(
|
|
|
test_tasks
|
|
|
), f"Expected {len(test_tasks)} results, got {len(results)}"
|
|
|
|
|
|
for i, result in enumerate(results):
|
|
|
assert (
|
|
|
result is not None
|
|
|
), f"Result {i+1} should not be None"
|
|
|
logger.info(f"✓ Task {i+1} result validation passed")
|
|
|
|
|
|
logger.success("✓ Actual execution test passed")
|
|
|
|
|
|
except Exception as run_error:
|
|
|
# Expected to fail without API keys, but verify it's a reasonable error
|
|
|
error_msg = str(run_error).lower()
|
|
|
if any(
|
|
|
keyword in error_msg
|
|
|
for keyword in [
|
|
|
"api",
|
|
|
"key",
|
|
|
"auth",
|
|
|
"token",
|
|
|
"openai",
|
|
|
"anthropic",
|
|
|
]
|
|
|
):
|
|
|
logger.info(
|
|
|
"✓ Batched execution failed as expected (no API key)"
|
|
|
)
|
|
|
else:
|
|
|
# If it's not an API key error, it might be a real issue
|
|
|
logger.warning(
|
|
|
f"Batched execution failed with unexpected error: {run_error}"
|
|
|
)
|
|
|
|
|
|
# Test 4: Error handling for invalid inputs
|
|
|
logger.info("Test 4: Error handling for invalid inputs")
|
|
|
|
|
|
# Test with empty task list
|
|
|
try:
|
|
|
results = router.batched_run([])
|
|
|
assert isinstance(
|
|
|
results, list
|
|
|
), "Should return empty list for empty input"
|
|
|
assert (
|
|
|
len(results) == 0
|
|
|
), "Empty input should return empty results"
|
|
|
logger.info("✓ Empty task list handling")
|
|
|
except Exception as empty_error:
|
|
|
logger.info(
|
|
|
f"✓ Empty task list properly handled: {empty_error}"
|
|
|
)
|
|
|
|
|
|
# Test with None tasks
|
|
|
try:
|
|
|
results = router.batched_run(None)
|
|
|
logger.info("✓ None tasks handling")
|
|
|
except Exception as none_error:
|
|
|
logger.info(f"✓ None tasks properly rejected: {none_error}")
|
|
|
|
|
|
logger.success("✓ All batched_run method tests passed")
|
|
|
|
|
|
|
|
|
def test_error_handling():
|
|
|
"""
|
|
|
Test error handling for various error conditions.
|
|
|
|
|
|
Tests:
|
|
|
- Initialization errors
|
|
|
- Execution errors
|
|
|
- Invalid configurations
|
|
|
"""
|
|
|
logger.info("Starting error handling tests...")
|
|
|
|
|
|
# Test 1: Invalid swarm type in select_swarm
|
|
|
logger.info("Test 1: Invalid swarm type error handling")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(swarm_type="invalid_type")
|
|
|
router.select_swarm()
|
|
|
assert (
|
|
|
False
|
|
|
), "Should have raised ReasoningAgentInitializationError"
|
|
|
except ReasoningAgentInitializationError:
|
|
|
logger.success(
|
|
|
"✓ Invalid swarm type error handling test passed"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ Invalid swarm type error handling test failed: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
# Test 2: Agent factory error handling
|
|
|
logger.info("Test 2: Agent factory error handling")
|
|
|
try:
|
|
|
# Create router with valid type but test error handling in factory
|
|
|
router = ReasoningAgentRouter(swarm_type="reasoning-duo")
|
|
|
# This should work without errors
|
|
|
agent = router._create_reasoning_duo()
|
|
|
assert (
|
|
|
agent is not None
|
|
|
), "Agent should be created successfully"
|
|
|
logger.success("✓ Agent factory error handling test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ Agent factory error handling test failed: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
logger.success("✓ All error handling tests passed")
|
|
|
|
|
|
|
|
|
def test_output_types():
|
|
|
"""
|
|
|
Test different output types configuration.
|
|
|
|
|
|
Tests:
|
|
|
- Various OutputType configurations
|
|
|
- Output type validation
|
|
|
"""
|
|
|
logger.info("Starting output types tests...")
|
|
|
|
|
|
output_types = ["dict-all-except-first", "dict", "string", "list"]
|
|
|
|
|
|
for output_type in output_types:
|
|
|
logger.info(f"Test: Output type {output_type}")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="reasoning-duo", output_type=output_type
|
|
|
)
|
|
|
assert (
|
|
|
router.output_type == output_type
|
|
|
), f"Expected {output_type}, got {router.output_type}"
|
|
|
logger.success(f"✓ Output type {output_type} test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ Output type {output_type} test failed: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
logger.success("✓ All output types tests passed")
|
|
|
|
|
|
|
|
|
def test_agent_configurations():
|
|
|
"""
|
|
|
Test various agent-specific configurations.
|
|
|
|
|
|
Tests:
|
|
|
- Different num_samples values
|
|
|
- Different max_loops values
|
|
|
- Different memory_capacity values
|
|
|
- Different num_knowledge_items values
|
|
|
"""
|
|
|
logger.info("Starting agent configurations tests...")
|
|
|
|
|
|
# Test 1: num_samples configuration
|
|
|
logger.info("Test 1: num_samples configuration")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="self-consistency", num_samples=5
|
|
|
)
|
|
|
assert (
|
|
|
router.num_samples == 5
|
|
|
), f"Expected 5, got {router.num_samples}"
|
|
|
logger.success("✓ num_samples configuration test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ num_samples configuration test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 2: max_loops configuration
|
|
|
logger.info("Test 2: max_loops configuration")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="reasoning-duo", max_loops=10
|
|
|
)
|
|
|
assert (
|
|
|
router.max_loops == 10
|
|
|
), f"Expected 10, got {router.max_loops}"
|
|
|
logger.success("✓ max_loops configuration test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ max_loops configuration test failed: {e}")
|
|
|
raise
|
|
|
|
|
|
# Test 3: memory_capacity configuration
|
|
|
logger.info("Test 3: memory_capacity configuration")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="ReflexionAgent", memory_capacity=50
|
|
|
)
|
|
|
assert (
|
|
|
router.memory_capacity == 50
|
|
|
), f"Expected 50, got {router.memory_capacity}"
|
|
|
logger.success("✓ memory_capacity configuration test passed")
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ memory_capacity configuration test failed: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
# Test 4: num_knowledge_items configuration
|
|
|
logger.info("Test 4: num_knowledge_items configuration")
|
|
|
try:
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type="GKPAgent", num_knowledge_items=15
|
|
|
)
|
|
|
assert (
|
|
|
router.num_knowledge_items == 15
|
|
|
), f"Expected 15, got {router.num_knowledge_items}"
|
|
|
logger.success(
|
|
|
"✓ num_knowledge_items configuration test passed"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ num_knowledge_items configuration test failed: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
logger.success("✓ All agent configurations tests passed")
|
|
|
|
|
|
|
|
|
def test_run_method_execution():
|
|
|
"""
|
|
|
Comprehensive test for the run method - the core functionality of ReasoningAgentRouter.
|
|
|
|
|
|
This test focuses specifically on testing the run(self, task) method with:
|
|
|
- Actual method execution
|
|
|
- Return value validation (non-None)
|
|
|
- Different agent types
|
|
|
- Various task types
|
|
|
- Error handling
|
|
|
- Method signature validation
|
|
|
"""
|
|
|
logger.info(
|
|
|
"Starting comprehensive run method execution tests..."
|
|
|
)
|
|
|
|
|
|
# Test all supported agent types
|
|
|
agent_types = [
|
|
|
"reasoning-duo",
|
|
|
"reasoning-agent",
|
|
|
"self-consistency",
|
|
|
"consistency-agent",
|
|
|
"ire",
|
|
|
"ire-agent",
|
|
|
"ReflexionAgent",
|
|
|
"GKPAgent",
|
|
|
"AgentJudge",
|
|
|
]
|
|
|
|
|
|
# Test tasks of different types and complexities
|
|
|
test_tasks = [
|
|
|
"What is 2+2?",
|
|
|
"Explain photosynthesis in one sentence.",
|
|
|
"List three benefits of renewable energy.",
|
|
|
"What is the capital of France?",
|
|
|
"Solve: 15 * 8 = ?",
|
|
|
"Define artificial intelligence briefly.",
|
|
|
]
|
|
|
|
|
|
for agent_type in agent_types:
|
|
|
logger.info(f"\n{'='*50}")
|
|
|
logger.info(f"Testing run method for: {agent_type}")
|
|
|
logger.info(f"{'='*50}")
|
|
|
|
|
|
try:
|
|
|
# Create router with appropriate configuration
|
|
|
router = ReasoningAgentRouter(
|
|
|
swarm_type=agent_type,
|
|
|
max_loops=1,
|
|
|
num_samples=(
|
|
|
2
|
|
|
if agent_type
|
|
|
in ["self-consistency", "consistency-agent"]
|
|
|
else 1
|
|
|
),
|
|
|
)
|
|
|
|
|
|
# Test 1: Method existence and callability
|
|
|
logger.info(
|
|
|
f"Test 1: Method existence and callability for {agent_type}"
|
|
|
)
|
|
|
assert hasattr(
|
|
|
router, "run"
|
|
|
), f"Router should have run method for {agent_type}"
|
|
|
assert callable(
|
|
|
router.run
|
|
|
), f"run method should be callable for {agent_type}"
|
|
|
logger.success(
|
|
|
f"✓ Method exists and is callable for {agent_type}"
|
|
|
)
|
|
|
|
|
|
# Test 2: Method signature validation
|
|
|
logger.info(
|
|
|
f"Test 2: Method signature validation for {agent_type}"
|
|
|
)
|
|
|
import inspect
|
|
|
|
|
|
sig = inspect.signature(router.run)
|
|
|
params = list(sig.parameters.keys())
|
|
|
assert (
|
|
|
"task" in params
|
|
|
), f"run method should have 'task' parameter for {agent_type}"
|
|
|
assert (
|
|
|
len(params) >= 1
|
|
|
), f"run method should have at least one parameter for {agent_type}"
|
|
|
logger.success(
|
|
|
f"✓ Method signature valid for {agent_type}: {params}"
|
|
|
)
|
|
|
|
|
|
# Test 3: Actual execution with multiple tasks
|
|
|
logger.info(
|
|
|
f"Test 3: Actual execution with multiple tasks for {agent_type}"
|
|
|
)
|
|
|
successful_executions = 0
|
|
|
total_executions = 0
|
|
|
|
|
|
for i, task in enumerate(test_tasks):
|
|
|
total_executions += 1
|
|
|
logger.info(
|
|
|
f" Executing task {i+1}/{len(test_tasks)}: '{task[:50]}{'...' if len(task) > 50 else ''}'"
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
# Execute the run method
|
|
|
result = router.run(task)
|
|
|
|
|
|
# Validate the result
|
|
|
if result is not None:
|
|
|
assert (
|
|
|
result is not None
|
|
|
), f"Result should not be None for task {i+1} with {agent_type}"
|
|
|
logger.success(
|
|
|
f" ✓ Task {i+1} executed successfully - Result type: {type(result)}"
|
|
|
)
|
|
|
successful_executions += 1
|
|
|
|
|
|
# Additional validation based on result type
|
|
|
if isinstance(result, str):
|
|
|
assert (
|
|
|
len(result) > 0
|
|
|
), f"String result should not be empty for task {i+1}"
|
|
|
logger.info(
|
|
|
f" ✓ String result length: {len(result)} characters"
|
|
|
)
|
|
|
elif isinstance(result, dict):
|
|
|
assert (
|
|
|
len(result) > 0
|
|
|
), f"Dict result should not be empty for task {i+1}"
|
|
|
logger.info(
|
|
|
f" ✓ Dict result keys: {list(result.keys())}"
|
|
|
)
|
|
|
elif isinstance(result, list):
|
|
|
logger.info(
|
|
|
f" ✓ List result length: {len(result)}"
|
|
|
)
|
|
|
else:
|
|
|
logger.info(
|
|
|
f" ✓ Result type: {type(result)}"
|
|
|
)
|
|
|
else:
|
|
|
logger.warning(
|
|
|
f" ⚠ Task {i+1} returned None (might be expected without API keys)"
|
|
|
)
|
|
|
|
|
|
except Exception as exec_error:
|
|
|
# Analyze the error to determine if it's expected
|
|
|
error_msg = str(exec_error).lower()
|
|
|
expected_keywords = [
|
|
|
"api",
|
|
|
"key",
|
|
|
"auth",
|
|
|
"token",
|
|
|
"openai",
|
|
|
"anthropic",
|
|
|
"rate",
|
|
|
"limit",
|
|
|
"quota",
|
|
|
"billing",
|
|
|
]
|
|
|
|
|
|
if any(
|
|
|
keyword in error_msg
|
|
|
for keyword in expected_keywords
|
|
|
):
|
|
|
logger.info(
|
|
|
f" ✓ Task {i+1} failed as expected (no API key) for {agent_type}"
|
|
|
)
|
|
|
else:
|
|
|
# Log unexpected errors for investigation
|
|
|
logger.warning(
|
|
|
f" ⚠ Task {i+1} failed with unexpected error for {agent_type}: {exec_error}"
|
|
|
)
|
|
|
|
|
|
# Test 4: Execution statistics
|
|
|
logger.info(
|
|
|
f"Test 4: Execution statistics for {agent_type}"
|
|
|
)
|
|
|
success_rate = (
|
|
|
(successful_executions / total_executions) * 100
|
|
|
if total_executions > 0
|
|
|
else 0
|
|
|
)
|
|
|
logger.info(
|
|
|
f" Execution success rate: {success_rate:.1f}% ({successful_executions}/{total_executions})"
|
|
|
)
|
|
|
|
|
|
if successful_executions > 0:
|
|
|
logger.success(
|
|
|
f"✓ {successful_executions} tasks executed successfully for {agent_type}"
|
|
|
)
|
|
|
else:
|
|
|
logger.info(
|
|
|
f"ℹ No tasks executed successfully for {agent_type} (expected without API keys)"
|
|
|
)
|
|
|
|
|
|
# Test 5: Error handling for edge cases
|
|
|
logger.info(
|
|
|
f"Test 5: Error handling for edge cases with {agent_type}"
|
|
|
)
|
|
|
|
|
|
# Test with empty string
|
|
|
try:
|
|
|
result = router.run("")
|
|
|
if result is not None:
|
|
|
logger.info(
|
|
|
f" ✓ Empty string handled gracefully for {agent_type}"
|
|
|
)
|
|
|
else:
|
|
|
logger.info(
|
|
|
f" ✓ Empty string returned None (acceptable) for {agent_type}"
|
|
|
)
|
|
|
except Exception:
|
|
|
logger.info(
|
|
|
f" ✓ Empty string properly rejected for {agent_type}"
|
|
|
)
|
|
|
|
|
|
# Test with None
|
|
|
try:
|
|
|
result = router.run(None)
|
|
|
if result is not None:
|
|
|
logger.info(
|
|
|
f" ✓ None handled gracefully for {agent_type}"
|
|
|
)
|
|
|
else:
|
|
|
logger.info(
|
|
|
f" ✓ None returned None (acceptable) for {agent_type}"
|
|
|
)
|
|
|
except Exception:
|
|
|
logger.info(
|
|
|
f" ✓ None properly rejected for {agent_type}"
|
|
|
)
|
|
|
|
|
|
# Test with very long task
|
|
|
long_task = "Explain " + "artificial intelligence " * 100
|
|
|
try:
|
|
|
result = router.run(long_task)
|
|
|
if result is not None:
|
|
|
logger.info(
|
|
|
f" ✓ Long task handled for {agent_type}"
|
|
|
)
|
|
|
else:
|
|
|
logger.info(
|
|
|
f" ✓ Long task returned None (acceptable) for {agent_type}"
|
|
|
)
|
|
|
except Exception:
|
|
|
logger.info(
|
|
|
f" ✓ Long task properly handled for {agent_type}"
|
|
|
)
|
|
|
|
|
|
logger.success(
|
|
|
f"✓ All run method tests completed for {agent_type}"
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ Run method test failed for {agent_type}: {e}"
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
logger.success(
|
|
|
"✓ All comprehensive run method execution tests passed"
|
|
|
)
|
|
|
|
|
|
|
|
|
def test_run_method_core_functionality():
|
|
|
"""
|
|
|
Core functionality test for the run method - the most important test.
|
|
|
|
|
|
This test specifically focuses on:
|
|
|
1. Testing run(self, task) with actual execution
|
|
|
2. Validating that results are not None
|
|
|
3. Testing all agent types
|
|
|
4. Comprehensive error handling
|
|
|
5. Return value type validation
|
|
|
"""
|
|
|
logger.info("Starting CORE run method functionality tests...")
|
|
|
logger.info(
|
|
|
"This is the most important test - validating run(self, task) execution"
|
|
|
)
|
|
|
|
|
|
# Test configurations for different agent types
|
|
|
test_configs = [
|
|
|
{
|
|
|
"swarm_type": "reasoning-duo",
|
|
|
"max_loops": 1,
|
|
|
"description": "Dual agent collaboration",
|
|
|
},
|
|
|
{
|
|
|
"swarm_type": "self-consistency",
|
|
|
"num_samples": 3,
|
|
|
"description": "Multiple independent solutions",
|
|
|
},
|
|
|
{
|
|
|
"swarm_type": "ire",
|
|
|
"max_loops": 1,
|
|
|
"description": "Iterative reflective expansion",
|
|
|
},
|
|
|
{
|
|
|
"swarm_type": "ReflexionAgent",
|
|
|
"max_loops": 1,
|
|
|
"description": "Self-reflection agent",
|
|
|
},
|
|
|
{
|
|
|
"swarm_type": "GKPAgent",
|
|
|
"description": "Generated knowledge prompting",
|
|
|
},
|
|
|
{
|
|
|
"swarm_type": "AgentJudge",
|
|
|
"max_loops": 1,
|
|
|
"description": "Agent evaluation",
|
|
|
},
|
|
|
]
|
|
|
|
|
|
# Core test tasks
|
|
|
core_tasks = [
|
|
|
"What is 2+2?",
|
|
|
"Explain the water cycle in one sentence.",
|
|
|
"What is the capital of Japan?",
|
|
|
"List two benefits of exercise.",
|
|
|
"Solve: 12 * 7 = ?",
|
|
|
]
|
|
|
|
|
|
total_tests = 0
|
|
|
successful_tests = 0
|
|
|
failed_tests = 0
|
|
|
|
|
|
for config in test_configs:
|
|
|
agent_type = config["swarm_type"]
|
|
|
description = config["description"]
|
|
|
|
|
|
logger.info(f"\n{'='*60}")
|
|
|
logger.info(f"Testing {agent_type} - {description}")
|
|
|
logger.info(f"{'='*60}")
|
|
|
|
|
|
try:
|
|
|
# Create router
|
|
|
router = ReasoningAgentRouter(**config)
|
|
|
|
|
|
# Test each core task
|
|
|
for i, task in enumerate(core_tasks):
|
|
|
total_tests += 1
|
|
|
logger.info(
|
|
|
f"\nTask {i+1}/{len(core_tasks)}: '{task}'"
|
|
|
)
|
|
|
logger.info(f"Agent: {agent_type}")
|
|
|
|
|
|
try:
|
|
|
# Execute the run method - THIS IS THE CORE TEST
|
|
|
result = router.run(task)
|
|
|
|
|
|
# CRITICAL VALIDATION: Result must not be None
|
|
|
if result is not None:
|
|
|
successful_tests += 1
|
|
|
logger.success(
|
|
|
"✓ SUCCESS: Task executed and returned non-None result"
|
|
|
)
|
|
|
logger.info(f" Result type: {type(result)}")
|
|
|
|
|
|
# Validate result content based on type
|
|
|
if isinstance(result, str):
|
|
|
assert (
|
|
|
len(result) > 0
|
|
|
), "String result should not be empty"
|
|
|
logger.info(
|
|
|
f" String length: {len(result)} characters"
|
|
|
)
|
|
|
logger.info(
|
|
|
f" First 100 chars: {result[:100]}{'...' if len(result) > 100 else ''}"
|
|
|
)
|
|
|
elif isinstance(result, dict):
|
|
|
assert (
|
|
|
len(result) > 0
|
|
|
), "Dict result should not be empty"
|
|
|
logger.info(
|
|
|
f" Dict keys: {list(result.keys())}"
|
|
|
)
|
|
|
logger.info(
|
|
|
f" Dict size: {len(result)} items"
|
|
|
)
|
|
|
elif isinstance(result, list):
|
|
|
logger.info(
|
|
|
f" List length: {len(result)} items"
|
|
|
)
|
|
|
else:
|
|
|
logger.info(
|
|
|
f" Result value: {str(result)[:100]}{'...' if len(str(result)) > 100 else ''}"
|
|
|
)
|
|
|
|
|
|
# Additional validation: result should be meaningful
|
|
|
if (
|
|
|
isinstance(result, str)
|
|
|
and len(result.strip()) == 0
|
|
|
):
|
|
|
logger.warning(
|
|
|
" ⚠ Result is empty string"
|
|
|
)
|
|
|
elif (
|
|
|
isinstance(result, dict)
|
|
|
and len(result) == 0
|
|
|
):
|
|
|
logger.warning(
|
|
|
" ⚠ Result is empty dictionary"
|
|
|
)
|
|
|
elif (
|
|
|
isinstance(result, list)
|
|
|
and len(result) == 0
|
|
|
):
|
|
|
logger.warning(" ⚠ Result is empty list")
|
|
|
else:
|
|
|
logger.success(
|
|
|
" ✓ Result appears to be meaningful content"
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
failed_tests += 1
|
|
|
logger.error(
|
|
|
"✗ FAILURE: Task returned None result"
|
|
|
)
|
|
|
logger.error(
|
|
|
" This indicates the run method is not working properly"
|
|
|
)
|
|
|
|
|
|
except Exception as exec_error:
|
|
|
failed_tests += 1
|
|
|
error_msg = str(exec_error)
|
|
|
logger.error(
|
|
|
"✗ FAILURE: Task execution failed with error"
|
|
|
)
|
|
|
logger.error(f" Error: {error_msg}")
|
|
|
|
|
|
# Check if it's an expected API key error
|
|
|
if any(
|
|
|
keyword in error_msg.lower()
|
|
|
for keyword in [
|
|
|
"api",
|
|
|
"key",
|
|
|
"auth",
|
|
|
"token",
|
|
|
"openai",
|
|
|
"anthropic",
|
|
|
]
|
|
|
):
|
|
|
logger.info(
|
|
|
" ℹ This appears to be an API key error (expected without credentials)"
|
|
|
)
|
|
|
else:
|
|
|
logger.warning(
|
|
|
" ⚠ This might be an unexpected error that needs investigation"
|
|
|
)
|
|
|
|
|
|
logger.info(f"\n{agent_type} Summary:")
|
|
|
logger.info(f" Total tasks tested: {len(core_tasks)}")
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(
|
|
|
f"✗ FAILURE: Router creation failed for {agent_type}: {e}"
|
|
|
)
|
|
|
failed_tests += len(core_tasks)
|
|
|
total_tests += len(core_tasks)
|
|
|
|
|
|
# Final summary
|
|
|
logger.info(f"\n{'='*60}")
|
|
|
logger.info("CORE RUN METHOD TEST SUMMARY")
|
|
|
logger.info(f"{'='*60}")
|
|
|
logger.info(f"Total tests executed: {total_tests}")
|
|
|
logger.info(f"Successful executions: {successful_tests}")
|
|
|
logger.info(f"Failed executions: {failed_tests}")
|
|
|
|
|
|
if total_tests > 0:
|
|
|
success_rate = (successful_tests / total_tests) * 100
|
|
|
logger.info(f"Success rate: {success_rate:.1f}%")
|
|
|
|
|
|
if success_rate >= 50:
|
|
|
logger.success(
|
|
|
f"✓ CORE TEST PASSED: {success_rate:.1f}% success rate is acceptable"
|
|
|
)
|
|
|
elif success_rate > 0:
|
|
|
logger.warning(
|
|
|
f"⚠ CORE TEST PARTIAL: {success_rate:.1f}% success rate - some functionality working"
|
|
|
)
|
|
|
else:
|
|
|
logger.error(
|
|
|
"✗ CORE TEST FAILED: 0% success rate - run method not working"
|
|
|
)
|
|
|
else:
|
|
|
logger.error("✗ CORE TEST FAILED: No tests were executed")
|
|
|
|
|
|
logger.info(f"{'='*60}")
|
|
|
|
|
|
# The test passes if we have some successful executions or if failures are due to API key issues
|
|
|
if successful_tests > 0:
|
|
|
logger.success("✓ Core run method functionality test PASSED")
|
|
|
return True
|
|
|
else:
|
|
|
logger.error("✗ Core run method functionality test FAILED")
|
|
|
return False
|
|
|
|
|
|
|
|
|
def run_all_tests():
|
|
|
"""
|
|
|
Run all unit tests for ReasoningAgentRouter.
|
|
|
|
|
|
This function executes all test functions and provides a summary.
|
|
|
"""
|
|
|
logger.info("=" * 60)
|
|
|
logger.info("Starting ReasoningAgentRouter Unit Tests")
|
|
|
logger.info("=" * 60)
|
|
|
|
|
|
test_functions = [
|
|
|
test_run_method_core_functionality, # Most important test - run method execution
|
|
|
test_run_method_execution, # Comprehensive run method tests
|
|
|
test_run_method, # Basic run method structure tests
|
|
|
test_router_initialization,
|
|
|
test_reliability_check,
|
|
|
test_agent_factories,
|
|
|
test_select_swarm,
|
|
|
test_batched_run_method,
|
|
|
test_error_handling,
|
|
|
test_output_types,
|
|
|
test_agent_configurations,
|
|
|
]
|
|
|
|
|
|
passed_tests = 0
|
|
|
total_tests = len(test_functions)
|
|
|
|
|
|
for test_func in test_functions:
|
|
|
try:
|
|
|
logger.info(f"\nRunning {test_func.__name__}...")
|
|
|
test_func()
|
|
|
passed_tests += 1
|
|
|
logger.success(
|
|
|
f"✓ {test_func.__name__} completed successfully"
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ {test_func.__name__} failed: {e}")
|
|
|
raise
|
|
|
|
|
|
logger.info("\n" + "=" * 60)
|
|
|
logger.info(
|
|
|
f"Test Summary: {passed_tests}/{total_tests} tests passed"
|
|
|
)
|
|
|
logger.info("=" * 60)
|
|
|
|
|
|
if passed_tests == total_tests:
|
|
|
logger.success("🎉 All tests passed successfully!")
|
|
|
return True
|
|
|
else:
|
|
|
logger.error(f"❌ {total_tests - passed_tests} tests failed")
|
|
|
return False
|
|
|
|
|
|
|
|
|
def run_core_tests_only():
|
|
|
"""
|
|
|
Run only the core run method tests - the most important functionality.
|
|
|
|
|
|
This function focuses specifically on testing the run(self, task) method
|
|
|
which is the core functionality of ReasoningAgentRouter.
|
|
|
"""
|
|
|
logger.info("=" * 60)
|
|
|
logger.info("Running CORE RUN METHOD TESTS ONLY")
|
|
|
logger.info("=" * 60)
|
|
|
|
|
|
core_test_functions = [
|
|
|
test_run_method_core_functionality, # Most important test
|
|
|
test_run_method_execution, # Comprehensive run method tests
|
|
|
test_run_method, # Basic run method structure tests
|
|
|
]
|
|
|
|
|
|
passed_tests = 0
|
|
|
total_tests = len(core_test_functions)
|
|
|
|
|
|
for test_func in core_test_functions:
|
|
|
try:
|
|
|
logger.info(f"\nRunning {test_func.__name__}...")
|
|
|
result = test_func()
|
|
|
if result is not False: # Allow True or None
|
|
|
passed_tests += 1
|
|
|
logger.success(
|
|
|
f"✓ {test_func.__name__} completed successfully"
|
|
|
)
|
|
|
else:
|
|
|
logger.error(f"✗ {test_func.__name__} failed")
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ {test_func.__name__} failed: {e}")
|
|
|
|
|
|
logger.info("\n" + "=" * 60)
|
|
|
logger.info(
|
|
|
f"CORE TEST SUMMARY: {passed_tests}/{total_tests} tests passed"
|
|
|
)
|
|
|
logger.info("=" * 60)
|
|
|
|
|
|
if passed_tests == total_tests:
|
|
|
logger.success(
|
|
|
"🎉 All core run method tests passed successfully!"
|
|
|
)
|
|
|
return True
|
|
|
else:
|
|
|
logger.error(
|
|
|
f"❌ {total_tests - passed_tests} core tests failed"
|
|
|
)
|
|
|
return False
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
"""
|
|
|
Main execution block for running the unit tests.
|
|
|
|
|
|
This block runs all tests when the script is executed directly.
|
|
|
Use run_core_tests_only() for focused testing of the run method.
|
|
|
"""
|
|
|
import sys
|
|
|
|
|
|
try:
|
|
|
success = run_all_tests()
|
|
|
if success:
|
|
|
logger.info(
|
|
|
"All ReasoningAgentRouter unit tests completed successfully!"
|
|
|
)
|
|
|
sys.exit(0)
|
|
|
else:
|
|
|
logger.error("Some tests failed!")
|
|
|
sys.exit(1)
|
|
|
except Exception as e:
|
|
|
logger.error(f"Test execution failed with error: {e}")
|
|
|
sys.exit(1)
|