You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2480 lines
80 KiB
2480 lines
80 KiB
import asyncio
|
|
import json
|
|
import os
|
|
import tempfile
|
|
import time
|
|
import unittest
|
|
from statistics import mean, median, stdev, variance
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import psutil
|
|
import pytest
|
|
import yaml
|
|
from dotenv import load_dotenv
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
|
|
from swarms import (
|
|
Agent,
|
|
create_agents_from_yaml,
|
|
)
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
# Global test configuration
|
|
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
|
|
# ============================================================================
|
|
# FIXTURES AND UTILITIES
|
|
# ============================================================================
|
|
|
|
|
|
@pytest.fixture
|
|
def basic_flow(mocked_llm):
|
|
"""Basic agent flow for testing"""
|
|
return Agent(llm=mocked_llm, max_loops=1)
|
|
|
|
|
|
@pytest.fixture
|
|
def flow_with_condition(mocked_llm):
|
|
"""Agent flow with stopping condition"""
|
|
from swarms.structs.agent import stop_when_repeats
|
|
|
|
return Agent(
|
|
llm=mocked_llm,
|
|
max_loops=1,
|
|
stopping_condition=stop_when_repeats,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_agents():
|
|
"""Mock agents for testing"""
|
|
|
|
class MockAgent:
|
|
def __init__(self, name):
|
|
self.name = name
|
|
self.agent_name = name
|
|
|
|
def run(self, task, img=None, *args, **kwargs):
|
|
return f"{self.name} processed {task}"
|
|
|
|
return [
|
|
MockAgent(name="Agent1"),
|
|
MockAgent(name="Agent2"),
|
|
MockAgent(name="Agent3"),
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def test_agent():
|
|
"""Create a real agent for testing"""
|
|
with patch("swarms.structs.agent.LiteLLM") as mock_llm:
|
|
mock_llm.return_value.run.return_value = "Test response"
|
|
return Agent(
|
|
agent_name="test_agent",
|
|
agent_description="A test agent",
|
|
system_prompt="You are a test agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
verbose=False,
|
|
print_on=False,
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# BASIC AGENT TESTS
|
|
# ============================================================================
|
|
|
|
|
|
class TestBasicAgent:
|
|
"""Test basic agent functionality"""
|
|
|
|
def test_stop_when_repeats(self):
|
|
"""Test stopping condition function"""
|
|
from swarms.structs.agent import stop_when_repeats
|
|
|
|
assert stop_when_repeats("Please Stop now")
|
|
assert not stop_when_repeats("Continue the process")
|
|
|
|
def test_flow_initialization(self, basic_flow):
|
|
"""Test agent initialization"""
|
|
assert basic_flow.max_loops == 5
|
|
assert basic_flow.stopping_condition is None
|
|
assert basic_flow.loop_interval == 1
|
|
assert basic_flow.retry_attempts == 3
|
|
assert basic_flow.retry_interval == 1
|
|
assert basic_flow.feedback == []
|
|
assert basic_flow.memory == []
|
|
assert basic_flow.task is None
|
|
assert basic_flow.stopping_token == "<DONE>"
|
|
assert not basic_flow.interactive
|
|
|
|
def test_provide_feedback(self, basic_flow):
|
|
"""Test feedback functionality"""
|
|
feedback = "Test feedback"
|
|
basic_flow.provide_feedback(feedback)
|
|
assert feedback in basic_flow.feedback
|
|
|
|
@patch("time.sleep", return_value=None)
|
|
def test_run_without_stopping_condition(
|
|
self, mocked_sleep, basic_flow
|
|
):
|
|
"""Test running without stopping condition"""
|
|
response = basic_flow.run("Test task")
|
|
assert response is not None
|
|
|
|
@patch("time.sleep", return_value=None)
|
|
def test_run_with_stopping_condition(
|
|
self, mocked_sleep, flow_with_condition
|
|
):
|
|
"""Test running with stopping condition"""
|
|
response = flow_with_condition.run("Stop")
|
|
assert response is not None
|
|
|
|
def test_bulk_run(self, basic_flow):
|
|
"""Test bulk run functionality"""
|
|
inputs = [{"task": "Test1"}, {"task": "Test2"}]
|
|
responses = basic_flow.bulk_run(inputs)
|
|
assert responses is not None
|
|
|
|
def test_save_and_load(self, basic_flow, tmp_path):
|
|
"""Test save and load functionality"""
|
|
file_path = tmp_path / "memory.json"
|
|
basic_flow.memory.append(["Test1", "Test2"])
|
|
basic_flow.save(file_path)
|
|
|
|
new_flow = Agent(llm=basic_flow.llm, max_loops=5)
|
|
new_flow.load(file_path)
|
|
assert new_flow.memory == [["Test1", "Test2"]]
|
|
|
|
def test_flow_call(self, basic_flow):
|
|
"""Test calling agent directly"""
|
|
response = basic_flow("Test call")
|
|
assert response == "Test call"
|
|
|
|
def test_format_prompt(self, basic_flow):
|
|
"""Test prompt formatting"""
|
|
formatted_prompt = basic_flow.format_prompt(
|
|
"Hello {name}", name="John"
|
|
)
|
|
assert formatted_prompt == "Hello John"
|
|
|
|
|
|
# ============================================================================
|
|
# AGENT FEATURES TESTS
|
|
# ============================================================================
|
|
|
|
|
|
class TestAgentFeatures:
|
|
"""Test advanced agent features"""
|
|
|
|
def test_basic_agent_functionality(self):
|
|
"""Test basic agent initialization and task execution"""
|
|
print("\nTesting basic agent functionality...")
|
|
|
|
agent = Agent(
|
|
agent_name="Test-Agent", model_name="gpt-4.1", max_loops=1
|
|
)
|
|
|
|
response = agent.run("What is 2+2?")
|
|
assert (
|
|
response is not None
|
|
), "Agent response should not be None"
|
|
|
|
# Test agent properties
|
|
assert (
|
|
agent.agent_name == "Test-Agent"
|
|
), "Agent name not set correctly"
|
|
assert agent.max_loops == 1, "Max loops not set correctly"
|
|
assert agent.llm is not None, "LLM not initialized"
|
|
|
|
print("✓ Basic agent functionality test passed")
|
|
|
|
def test_memory_management(self):
|
|
"""Test agent memory management functionality"""
|
|
print("\nTesting memory management...")
|
|
|
|
agent = Agent(
|
|
agent_name="Memory-Test-Agent",
|
|
max_loops=1,
|
|
model_name="gpt-4.1",
|
|
context_length=8192,
|
|
)
|
|
|
|
# Test adding to memory
|
|
agent.add_memory("Test memory entry")
|
|
assert (
|
|
"Test memory entry"
|
|
in agent.short_memory.return_history_as_string()
|
|
)
|
|
|
|
# Test memory query
|
|
agent.memory_query("Test query")
|
|
|
|
# Test token counting
|
|
tokens = agent.check_available_tokens()
|
|
assert isinstance(
|
|
tokens, int
|
|
), "Token count should be an integer"
|
|
|
|
print("✓ Memory management test passed")
|
|
|
|
def test_agent_output_formats(self):
|
|
"""Test all available output formats"""
|
|
print("\nTesting all output formats...")
|
|
|
|
test_task = "Say hello!"
|
|
|
|
output_types = {
|
|
"str": str,
|
|
"string": str,
|
|
"list": str, # JSON string containing list
|
|
"json": str, # JSON string
|
|
"dict": dict,
|
|
"yaml": str,
|
|
}
|
|
|
|
for output_type, expected_type in output_types.items():
|
|
agent = Agent(
|
|
agent_name=f"{output_type.capitalize()}-Output-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
output_type=output_type,
|
|
)
|
|
|
|
response = agent.run(test_task)
|
|
assert (
|
|
response is not None
|
|
), f"{output_type} output should not be None"
|
|
|
|
if output_type == "yaml":
|
|
# Verify YAML can be parsed
|
|
try:
|
|
yaml.safe_load(response)
|
|
print(f"✓ {output_type} output valid")
|
|
except yaml.YAMLError:
|
|
assert (
|
|
False
|
|
), f"Invalid YAML output for {output_type}"
|
|
elif output_type in ["json", "list"]:
|
|
# Verify JSON can be parsed
|
|
try:
|
|
json.loads(response)
|
|
print(f"✓ {output_type} output valid")
|
|
except json.JSONDecodeError:
|
|
assert (
|
|
False
|
|
), f"Invalid JSON output for {output_type}"
|
|
|
|
print("✓ Output formats test passed")
|
|
|
|
def test_agent_state_management(self):
|
|
"""Test comprehensive state management functionality"""
|
|
print("\nTesting state management...")
|
|
|
|
# Create temporary directory for test files
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
state_path = os.path.join(temp_dir, "agent_state.json")
|
|
|
|
# Create agent with initial state
|
|
agent1 = Agent(
|
|
agent_name="State-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
saved_state_path=state_path,
|
|
)
|
|
|
|
# Add some data to the agent
|
|
agent1.run("Remember this: Test message 1")
|
|
agent1.add_memory("Test message 2")
|
|
|
|
# Save state
|
|
agent1.save()
|
|
assert os.path.exists(
|
|
state_path
|
|
), "State file not created"
|
|
|
|
# Create new agent and load state
|
|
agent2 = Agent(
|
|
agent_name="State-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
)
|
|
agent2.load(state_path)
|
|
|
|
# Verify state loaded correctly
|
|
history2 = agent2.short_memory.return_history_as_string()
|
|
assert (
|
|
"Test message 1" in history2
|
|
), "State not loaded correctly"
|
|
assert (
|
|
"Test message 2" in history2
|
|
), "Memory not loaded correctly"
|
|
|
|
# Test autosave functionality
|
|
agent3 = Agent(
|
|
agent_name="Autosave-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
saved_state_path=os.path.join(
|
|
temp_dir, "autosave_state.json"
|
|
),
|
|
autosave=True,
|
|
)
|
|
|
|
agent3.run("Test autosave")
|
|
time.sleep(2) # Wait for autosave
|
|
assert os.path.exists(
|
|
os.path.join(temp_dir, "autosave_state.json")
|
|
), "Autosave file not created"
|
|
|
|
print("✓ State management test passed")
|
|
|
|
def test_agent_tools_and_execution(self):
|
|
"""Test agent tool handling and execution"""
|
|
print("\nTesting tools and execution...")
|
|
|
|
def sample_tool(x: int, y: int) -> int:
|
|
"""Sample tool that adds two numbers"""
|
|
return x + y
|
|
|
|
agent = Agent(
|
|
agent_name="Tools-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
tools=[sample_tool],
|
|
)
|
|
|
|
# Test adding tools
|
|
agent.add_tool(lambda x: x * 2)
|
|
assert len(agent.tools) == 2, "Tool not added correctly"
|
|
|
|
# Test removing tools
|
|
agent.remove_tool(sample_tool)
|
|
assert len(agent.tools) == 1, "Tool not removed correctly"
|
|
|
|
# Test tool execution
|
|
response = agent.run("Calculate 2 + 2 using the sample tool")
|
|
assert response is not None, "Tool execution failed"
|
|
|
|
print("✓ Tools and execution test passed")
|
|
|
|
def test_agent_concurrent_execution(self):
|
|
"""Test agent concurrent execution capabilities"""
|
|
print("\nTesting concurrent execution...")
|
|
|
|
agent = Agent(
|
|
agent_name="Concurrent-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
)
|
|
|
|
# Test bulk run
|
|
tasks = [
|
|
{"task": "Count to 3"},
|
|
{"task": "Say hello"},
|
|
{"task": "Tell a short joke"},
|
|
]
|
|
|
|
responses = agent.bulk_run(tasks)
|
|
assert len(responses) == len(tasks), "Not all tasks completed"
|
|
assert all(
|
|
response is not None for response in responses
|
|
), "Some tasks failed"
|
|
|
|
# Test concurrent tasks
|
|
concurrent_responses = agent.run_concurrent_tasks(
|
|
["Task 1", "Task 2", "Task 3"]
|
|
)
|
|
assert (
|
|
len(concurrent_responses) == 3
|
|
), "Not all concurrent tasks completed"
|
|
|
|
print("✓ Concurrent execution test passed")
|
|
|
|
def test_agent_error_handling(self):
|
|
"""Test agent error handling and recovery"""
|
|
print("\nTesting error handling...")
|
|
|
|
agent = Agent(
|
|
agent_name="Error-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
retry_attempts=3,
|
|
retry_interval=1,
|
|
)
|
|
|
|
# Test invalid tool execution
|
|
try:
|
|
agent.parse_and_execute_tools("invalid_json")
|
|
print("✓ Invalid tool execution handled")
|
|
except Exception:
|
|
assert True, "Expected error caught"
|
|
|
|
# Test recovery after error
|
|
response = agent.run("Continue after error")
|
|
assert (
|
|
response is not None
|
|
), "Agent failed to recover after error"
|
|
|
|
print("✓ Error handling test passed")
|
|
|
|
def test_agent_configuration(self):
|
|
"""Test agent configuration and parameters"""
|
|
print("\nTesting agent configuration...")
|
|
|
|
agent = Agent(
|
|
agent_name="Config-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
temperature=0.7,
|
|
max_tokens=4000,
|
|
context_length=8192,
|
|
)
|
|
|
|
# Test configuration methods
|
|
agent.update_system_prompt("New system prompt")
|
|
agent.update_max_loops(2)
|
|
agent.update_loop_interval(2)
|
|
|
|
# Verify updates
|
|
assert agent.max_loops == 2, "Max loops not updated"
|
|
assert agent.loop_interval == 2, "Loop interval not updated"
|
|
|
|
# Test configuration export
|
|
config_dict = agent.to_dict()
|
|
assert isinstance(
|
|
config_dict, dict
|
|
), "Configuration export failed"
|
|
|
|
# Test YAML export
|
|
yaml_config = agent.to_yaml()
|
|
assert isinstance(yaml_config, str), "YAML export failed"
|
|
|
|
print("✓ Configuration test passed")
|
|
|
|
def test_agent_with_stopping_condition(self):
|
|
"""Test agent with custom stopping condition"""
|
|
print("\nTesting agent with stopping condition...")
|
|
|
|
def custom_stopping_condition(response: str) -> bool:
|
|
return "STOP" in response.upper()
|
|
|
|
agent = Agent(
|
|
agent_name="Stopping-Condition-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
stopping_condition=custom_stopping_condition,
|
|
)
|
|
|
|
response = agent.run("Count up until you see the word STOP")
|
|
assert response is not None, "Stopping condition test failed"
|
|
print("✓ Stopping condition test passed")
|
|
|
|
def test_agent_with_retry_mechanism(self):
|
|
"""Test agent retry mechanism"""
|
|
print("\nTesting agent retry mechanism...")
|
|
|
|
agent = Agent(
|
|
agent_name="Retry-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
retry_attempts=3,
|
|
retry_interval=1,
|
|
)
|
|
|
|
response = agent.run("Tell me a joke.")
|
|
assert response is not None, "Retry mechanism test failed"
|
|
print("✓ Retry mechanism test passed")
|
|
|
|
def test_bulk_and_filtered_operations(self):
|
|
"""Test bulk operations and response filtering"""
|
|
print("\nTesting bulk and filtered operations...")
|
|
|
|
agent = Agent(
|
|
agent_name="Bulk-Filter-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
)
|
|
|
|
# Test bulk run
|
|
bulk_tasks = [
|
|
{"task": "What is 2+2?"},
|
|
{"task": "Name a color"},
|
|
{"task": "Count to 3"},
|
|
]
|
|
bulk_responses = agent.bulk_run(bulk_tasks)
|
|
assert len(bulk_responses) == len(
|
|
bulk_tasks
|
|
), "Bulk run should return same number of responses as tasks"
|
|
|
|
# Test response filtering
|
|
agent.add_response_filter("color")
|
|
filtered_response = agent.filtered_run(
|
|
"What is your favorite color?"
|
|
)
|
|
assert (
|
|
"[FILTERED]" in filtered_response
|
|
), "Response filter not applied"
|
|
|
|
print("✓ Bulk and filtered operations test passed")
|
|
|
|
async def test_async_operations(self):
|
|
"""Test asynchronous operations"""
|
|
print("\nTesting async operations...")
|
|
|
|
agent = Agent(
|
|
agent_name="Async-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
)
|
|
|
|
# Test single async run
|
|
response = await agent.arun("What is 1+1?")
|
|
assert response is not None, "Async run failed"
|
|
|
|
# Test concurrent async runs
|
|
tasks = ["Task 1", "Task 2", "Task 3"]
|
|
responses = await asyncio.gather(
|
|
*[agent.arun(task) for task in tasks]
|
|
)
|
|
assert len(responses) == len(
|
|
tasks
|
|
), "Not all async tasks completed"
|
|
|
|
print("✓ Async operations test passed")
|
|
|
|
def test_memory_and_state_persistence(self):
|
|
"""Test memory management and state persistence"""
|
|
print("\nTesting memory and state persistence...")
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
state_path = os.path.join(temp_dir, "test_state.json")
|
|
|
|
# Create agent with memory configuration
|
|
agent1 = Agent(
|
|
agent_name="Memory-State-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
saved_state_path=state_path,
|
|
context_length=8192,
|
|
autosave=True,
|
|
)
|
|
|
|
# Test memory operations
|
|
agent1.add_memory("Important fact: The sky is blue")
|
|
agent1.memory_query("What color is the sky?")
|
|
|
|
# Save state
|
|
agent1.save()
|
|
|
|
# Create new agent and load state
|
|
agent2 = Agent(
|
|
agent_name="Memory-State-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
)
|
|
agent2.load(state_path)
|
|
|
|
# Verify memory persistence
|
|
memory_content = (
|
|
agent2.short_memory.return_history_as_string()
|
|
)
|
|
assert (
|
|
"sky is blue" in memory_content
|
|
), "Memory not properly persisted"
|
|
|
|
print("✓ Memory and state persistence test passed")
|
|
|
|
def test_sentiment_and_evaluation(self):
|
|
"""Test sentiment analysis and response evaluation"""
|
|
print("\nTesting sentiment analysis and evaluation...")
|
|
|
|
def mock_sentiment_analyzer(text):
|
|
"""Mock sentiment analyzer that returns a score between 0 and 1"""
|
|
return 0.7 if "positive" in text.lower() else 0.3
|
|
|
|
def mock_evaluator(response):
|
|
"""Mock evaluator that checks response quality"""
|
|
return "GOOD" if len(response) > 10 else "BAD"
|
|
|
|
agent = Agent(
|
|
agent_name="Sentiment-Eval-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
sentiment_analyzer=mock_sentiment_analyzer,
|
|
sentiment_threshold=0.5,
|
|
evaluator=mock_evaluator,
|
|
)
|
|
|
|
# Test sentiment analysis
|
|
agent.run("Generate a positive message")
|
|
|
|
# Test evaluation
|
|
agent.run("Generate a detailed response")
|
|
|
|
print("✓ Sentiment and evaluation test passed")
|
|
|
|
def test_tool_management(self):
|
|
"""Test tool management functionality"""
|
|
print("\nTesting tool management...")
|
|
|
|
def tool1(x: int) -> int:
|
|
"""Sample tool 1"""
|
|
return x * 2
|
|
|
|
def tool2(x: int) -> int:
|
|
"""Sample tool 2"""
|
|
return x + 2
|
|
|
|
agent = Agent(
|
|
agent_name="Tool-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
tools=[tool1],
|
|
)
|
|
|
|
# Test adding tools
|
|
agent.add_tool(tool2)
|
|
assert len(agent.tools) == 2, "Tool not added correctly"
|
|
|
|
# Test removing tools
|
|
agent.remove_tool(tool1)
|
|
assert len(agent.tools) == 1, "Tool not removed correctly"
|
|
|
|
# Test adding multiple tools
|
|
agent.add_tools([tool1, tool2])
|
|
assert (
|
|
len(agent.tools) == 3
|
|
), "Multiple tools not added correctly"
|
|
|
|
print("✓ Tool management test passed")
|
|
|
|
def test_system_prompt_and_configuration(self):
|
|
"""Test system prompt and configuration updates"""
|
|
print("\nTesting system prompt and configuration...")
|
|
|
|
agent = Agent(
|
|
agent_name="Config-Test-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=1,
|
|
)
|
|
|
|
# Test updating system prompt
|
|
new_prompt = "You are a helpful assistant."
|
|
agent.update_system_prompt(new_prompt)
|
|
assert (
|
|
agent.system_prompt == new_prompt
|
|
), "System prompt not updated"
|
|
|
|
# Test configuration updates
|
|
agent.update_max_loops(5)
|
|
assert agent.max_loops == 5, "Max loops not updated"
|
|
|
|
agent.update_loop_interval(2)
|
|
assert agent.loop_interval == 2, "Loop interval not updated"
|
|
|
|
# Test configuration export
|
|
config_dict = agent.to_dict()
|
|
assert isinstance(
|
|
config_dict, dict
|
|
), "Configuration export failed"
|
|
|
|
print("✓ System prompt and configuration test passed")
|
|
|
|
def test_agent_with_dynamic_temperature(self):
|
|
"""Test agent with dynamic temperature"""
|
|
print("\nTesting agent with dynamic temperature...")
|
|
|
|
agent = Agent(
|
|
agent_name="Dynamic-Temp-Agent",
|
|
model_name="gpt-4.1",
|
|
max_loops=2,
|
|
dynamic_temperature_enabled=True,
|
|
)
|
|
|
|
response = agent.run("Generate a creative story.")
|
|
assert response is not None, "Dynamic temperature test failed"
|
|
print("✓ Dynamic temperature test passed")
|
|
|
|
|
|
# ============================================================================
|
|
# AGENT LOGGING TESTS
|
|
# ============================================================================
|
|
|
|
|
|
class TestAgentLogging:
|
|
"""Test agent logging functionality"""
|
|
|
|
def setUp(self):
|
|
"""Set up test fixtures"""
|
|
self.mock_tokenizer = MagicMock()
|
|
self.mock_tokenizer.count_tokens.return_value = 100
|
|
|
|
self.mock_short_memory = MagicMock()
|
|
self.mock_short_memory.get_memory_stats.return_value = {
|
|
"message_count": 2
|
|
}
|
|
|
|
self.mock_long_memory = MagicMock()
|
|
self.mock_long_memory.get_memory_stats.return_value = {
|
|
"item_count": 5
|
|
}
|
|
|
|
self.agent = Agent(
|
|
tokenizer=self.mock_tokenizer,
|
|
short_memory=self.mock_short_memory,
|
|
long_term_memory=self.mock_long_memory,
|
|
)
|
|
|
|
def test_log_step_metadata_basic(self):
|
|
"""Test basic step metadata logging"""
|
|
log_result = self.agent.log_step_metadata(
|
|
1, "Test prompt", "Test response"
|
|
)
|
|
|
|
assert "step_id" in log_result
|
|
assert "timestamp" in log_result
|
|
assert "tokens" in log_result
|
|
assert "memory_usage" in log_result
|
|
|
|
assert log_result["tokens"]["total"] == 200
|
|
|
|
def test_log_step_metadata_no_long_term_memory(self):
|
|
"""Test step metadata logging without long term memory"""
|
|
self.agent.long_term_memory = None
|
|
log_result = self.agent.log_step_metadata(
|
|
1, "prompt", "response"
|
|
)
|
|
assert log_result["memory_usage"]["long_term"] == {}
|
|
|
|
def test_log_step_metadata_timestamp(self):
|
|
"""Test step metadata logging timestamp"""
|
|
log_result = self.agent.log_step_metadata(
|
|
1, "prompt", "response"
|
|
)
|
|
assert "timestamp" in log_result
|
|
|
|
def test_token_counting_integration(self):
|
|
"""Test token counting integration"""
|
|
self.mock_tokenizer.count_tokens.side_effect = [150, 250]
|
|
log_result = self.agent.log_step_metadata(
|
|
1, "prompt", "response"
|
|
)
|
|
|
|
assert log_result["tokens"]["total"] == 400
|
|
|
|
def test_agent_output_updating(self):
|
|
"""Test agent output updating"""
|
|
initial_total_tokens = sum(
|
|
step["tokens"]["total"]
|
|
for step in self.agent.agent_output.steps
|
|
)
|
|
self.agent.log_step_metadata(1, "prompt", "response")
|
|
|
|
final_total_tokens = sum(
|
|
step["tokens"]["total"]
|
|
for step in self.agent.agent_output.steps
|
|
)
|
|
assert final_total_tokens - initial_total_tokens == 200
|
|
assert len(self.agent.agent_output.steps) == 1
|
|
|
|
def test_full_logging_cycle(self):
|
|
"""Test full logging cycle"""
|
|
agent = Agent(agent_name="test-agent")
|
|
task = "Test task"
|
|
max_loops = 1
|
|
|
|
result = agent._run(task, max_loops=max_loops)
|
|
|
|
assert isinstance(result, dict)
|
|
assert "steps" in result
|
|
assert isinstance(result["steps"], list)
|
|
assert len(result["steps"]) == max_loops
|
|
|
|
if result["steps"]:
|
|
step = result["steps"][0]
|
|
assert "step_id" in step
|
|
assert "timestamp" in step
|
|
assert "task" in step
|
|
assert "response" in step
|
|
assert step["task"] == task
|
|
assert step["response"] == "Response for loop 1"
|
|
|
|
assert len(self.agent.agent_output.steps) > 0
|
|
|
|
|
|
# ============================================================================
|
|
# YAML AGENT CREATION TESTS
|
|
# ============================================================================
|
|
|
|
|
|
class TestCreateAgentsFromYaml:
|
|
"""Test YAML agent creation functionality"""
|
|
|
|
def setUp(self):
|
|
"""Set up test fixtures"""
|
|
# Mock the environment variable for API key
|
|
os.environ["OPENAI_API_KEY"] = "fake-api-key"
|
|
|
|
# Mock agent configuration YAML content
|
|
self.valid_yaml_content = """
|
|
agents:
|
|
- agent_name: "Financial-Analysis-Agent"
|
|
model:
|
|
openai_api_key: "fake-api-key"
|
|
model_name: "gpt-4o-mini"
|
|
temperature: 0.1
|
|
max_tokens: 2000
|
|
system_prompt: "financial_agent_sys_prompt"
|
|
max_loops: 1
|
|
autosave: true
|
|
dashboard: false
|
|
verbose: true
|
|
dynamic_temperature_enabled: true
|
|
saved_state_path: "finance_agent.json"
|
|
user_name: "swarms_corp"
|
|
retry_attempts: 1
|
|
context_length: 200000
|
|
return_step_meta: false
|
|
output_type: "str"
|
|
task: "How can I establish a ROTH IRA to buy stocks and get a tax break?"
|
|
"""
|
|
|
|
@patch(
|
|
"builtins.open",
|
|
new_callable=unittest.mock.mock_open,
|
|
read_data="",
|
|
)
|
|
@patch("yaml.safe_load")
|
|
def test_create_agents_return_agents(
|
|
self, mock_safe_load, mock_open
|
|
):
|
|
"""Test creating agents from YAML and returning agents"""
|
|
# Mock YAML content parsing
|
|
mock_safe_load.return_value = {
|
|
"agents": [
|
|
{
|
|
"agent_name": "Financial-Analysis-Agent",
|
|
"model": {
|
|
"openai_api_key": "fake-api-key",
|
|
"model_name": "gpt-4o-mini",
|
|
"temperature": 0.1,
|
|
"max_tokens": 2000,
|
|
},
|
|
"system_prompt": "financial_agent_sys_prompt",
|
|
"max_loops": 1,
|
|
"autosave": True,
|
|
"dashboard": False,
|
|
"verbose": True,
|
|
"dynamic_temperature_enabled": True,
|
|
"saved_state_path": "finance_agent.json",
|
|
"user_name": "swarms_corp",
|
|
"retry_attempts": 1,
|
|
"context_length": 200000,
|
|
"return_step_meta": False,
|
|
"output_type": "str",
|
|
"task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
|
|
}
|
|
]
|
|
}
|
|
|
|
# Test if agents are returned correctly
|
|
agents = create_agents_from_yaml(
|
|
"fake_yaml_path.yaml", return_type="agents"
|
|
)
|
|
assert len(agents) == 1
|
|
assert agents[0].agent_name == "Financial-Analysis-Agent"
|
|
|
|
@patch(
|
|
"builtins.open",
|
|
new_callable=unittest.mock.mock_open,
|
|
read_data="",
|
|
)
|
|
@patch("yaml.safe_load")
|
|
@patch(
|
|
"swarms.Agent.run", return_value="Task completed successfully"
|
|
)
|
|
def test_create_agents_return_tasks(
|
|
self, mock_agent_run, mock_safe_load, mock_open
|
|
):
|
|
"""Test creating agents from YAML and returning task results"""
|
|
# Mock YAML content parsing
|
|
mock_safe_load.return_value = {
|
|
"agents": [
|
|
{
|
|
"agent_name": "Financial-Analysis-Agent",
|
|
"model": {
|
|
"openai_api_key": "fake-api-key",
|
|
"model_name": "gpt-4o-mini",
|
|
"temperature": 0.1,
|
|
"max_tokens": 2000,
|
|
},
|
|
"system_prompt": "financial_agent_sys_prompt",
|
|
"max_loops": 1,
|
|
"autosave": True,
|
|
"dashboard": False,
|
|
"verbose": True,
|
|
"dynamic_temperature_enabled": True,
|
|
"saved_state_path": "finance_agent.json",
|
|
"user_name": "swarms_corp",
|
|
"retry_attempts": 1,
|
|
"context_length": 200000,
|
|
"return_step_meta": False,
|
|
"output_type": "str",
|
|
"task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
|
|
}
|
|
]
|
|
}
|
|
|
|
# Test if tasks are executed and results are returned
|
|
task_results = create_agents_from_yaml(
|
|
"fake_yaml_path.yaml", return_type="tasks"
|
|
)
|
|
assert len(task_results) == 1
|
|
assert (
|
|
task_results[0]["agent_name"]
|
|
== "Financial-Analysis-Agent"
|
|
)
|
|
assert task_results[0]["output"] is not None
|
|
|
|
@patch(
|
|
"builtins.open",
|
|
new_callable=unittest.mock.mock_open,
|
|
read_data="",
|
|
)
|
|
@patch("yaml.safe_load")
|
|
def test_create_agents_return_both(
|
|
self, mock_safe_load, mock_open
|
|
):
|
|
"""Test creating agents from YAML and returning both agents and tasks"""
|
|
# Mock YAML content parsing
|
|
mock_safe_load.return_value = {
|
|
"agents": [
|
|
{
|
|
"agent_name": "Financial-Analysis-Agent",
|
|
"model": {
|
|
"openai_api_key": "fake-api-key",
|
|
"model_name": "gpt-4o-mini",
|
|
"temperature": 0.1,
|
|
"max_tokens": 2000,
|
|
},
|
|
"system_prompt": "financial_agent_sys_prompt",
|
|
"max_loops": 1,
|
|
"autosave": True,
|
|
"dashboard": False,
|
|
"verbose": True,
|
|
"dynamic_temperature_enabled": True,
|
|
"saved_state_path": "finance_agent.json",
|
|
"user_name": "swarms_corp",
|
|
"retry_attempts": 1,
|
|
"context_length": 200000,
|
|
"return_step_meta": False,
|
|
"output_type": "str",
|
|
"task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
|
|
}
|
|
]
|
|
}
|
|
|
|
# Test if both agents and tasks are returned
|
|
agents, task_results = create_agents_from_yaml(
|
|
"fake_yaml_path.yaml", return_type="both"
|
|
)
|
|
assert len(agents) == 1
|
|
assert len(task_results) == 1
|
|
assert agents[0].agent_name == "Financial-Analysis-Agent"
|
|
assert task_results[0]["output"] is not None
|
|
|
|
@patch(
|
|
"builtins.open",
|
|
new_callable=unittest.mock.mock_open,
|
|
read_data="",
|
|
)
|
|
@patch("yaml.safe_load")
|
|
def test_missing_agents_in_yaml(self, mock_safe_load, mock_open):
|
|
"""Test handling missing agents in YAML"""
|
|
# Mock YAML content with missing "agents" key
|
|
mock_safe_load.return_value = {}
|
|
|
|
# Test if the function raises an error for missing "agents" key
|
|
with pytest.raises(ValueError) as context:
|
|
create_agents_from_yaml(
|
|
"fake_yaml_path.yaml", return_type="agents"
|
|
)
|
|
assert (
|
|
"The YAML configuration does not contain 'agents'."
|
|
in str(context.exception)
|
|
)
|
|
|
|
@patch(
|
|
"builtins.open",
|
|
new_callable=unittest.mock.mock_open,
|
|
read_data="",
|
|
)
|
|
@patch("yaml.safe_load")
|
|
def test_invalid_return_type(self, mock_safe_load, mock_open):
|
|
"""Test handling invalid return type"""
|
|
# Mock YAML content parsing
|
|
mock_safe_load.return_value = {
|
|
"agents": [
|
|
{
|
|
"agent_name": "Financial-Analysis-Agent",
|
|
"model": {
|
|
"openai_api_key": "fake-api-key",
|
|
"model_name": "gpt-4o-mini",
|
|
"temperature": 0.1,
|
|
"max_tokens": 2000,
|
|
},
|
|
"system_prompt": "financial_agent_sys_prompt",
|
|
"max_loops": 1,
|
|
"autosave": True,
|
|
"dashboard": False,
|
|
"verbose": True,
|
|
"dynamic_temperature_enabled": True,
|
|
"saved_state_path": "finance_agent.json",
|
|
"user_name": "swarms_corp",
|
|
"retry_attempts": 1,
|
|
"context_length": 200000,
|
|
"return_step_meta": False,
|
|
"output_type": "str",
|
|
"task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
|
|
}
|
|
]
|
|
}
|
|
|
|
# Test if an error is raised for invalid return_type
|
|
with pytest.raises(ValueError) as context:
|
|
create_agents_from_yaml(
|
|
"fake_yaml_path.yaml", return_type="invalid_type"
|
|
)
|
|
assert "Invalid return_type" in str(context.exception)
|
|
|
|
|
|
# ============================================================================
|
|
# BENCHMARK TESTS
|
|
# ============================================================================
|
|
|
|
|
|
class TestAgentBenchmark:
|
|
"""Test agent benchmarking functionality"""
|
|
|
|
def test_benchmark_multiple_agents(self):
|
|
"""Test benchmarking multiple agents"""
|
|
console = Console()
|
|
init_times = []
|
|
memory_readings = []
|
|
process = psutil.Process(os.getpid())
|
|
|
|
# Create benchmark tables
|
|
time_table = Table(title="Time Statistics")
|
|
time_table.add_column("Metric", style="cyan")
|
|
time_table.add_column("Value", style="green")
|
|
|
|
memory_table = Table(title="Memory Statistics")
|
|
memory_table.add_column("Metric", style="cyan")
|
|
memory_table.add_column("Value", style="green")
|
|
|
|
initial_memory = process.memory_info().rss / 1024
|
|
start_total_time = time.perf_counter()
|
|
|
|
# Initialize agents and measure performance
|
|
num_agents = 10 # Reduced for testing
|
|
for i in range(num_agents):
|
|
start_time = time.perf_counter()
|
|
|
|
Agent(
|
|
agent_name=f"Financial-Analysis-Agent-{i}",
|
|
agent_description="Personal finance advisor agent",
|
|
max_loops=2,
|
|
model_name="gpt-4o-mini",
|
|
dynamic_temperature_enabled=True,
|
|
interactive=False,
|
|
)
|
|
|
|
init_time = (time.perf_counter() - start_time) * 1000
|
|
init_times.append(init_time)
|
|
|
|
current_memory = process.memory_info().rss / 1024
|
|
memory_readings.append(current_memory - initial_memory)
|
|
|
|
if (i + 1) % 5 == 0:
|
|
console.print(
|
|
f"Created {i + 1} agents...", style="bold blue"
|
|
)
|
|
|
|
(time.perf_counter() - start_total_time) * 1000
|
|
|
|
# Calculate statistics
|
|
time_stats = self._get_time_stats(init_times)
|
|
memory_stats = self._get_memory_stats(memory_readings)
|
|
|
|
# Verify basic statistics
|
|
assert len(init_times) == num_agents
|
|
assert len(memory_readings) == num_agents
|
|
assert time_stats["mean"] > 0
|
|
assert memory_stats["mean"] >= 0
|
|
|
|
print("✓ Benchmark test passed")
|
|
|
|
def _get_memory_stats(self, memory_readings):
|
|
"""Calculate memory statistics"""
|
|
return {
|
|
"peak": max(memory_readings) if memory_readings else 0,
|
|
"min": min(memory_readings) if memory_readings else 0,
|
|
"mean": mean(memory_readings) if memory_readings else 0,
|
|
"median": (
|
|
median(memory_readings) if memory_readings else 0
|
|
),
|
|
"stdev": (
|
|
stdev(memory_readings)
|
|
if len(memory_readings) > 1
|
|
else 0
|
|
),
|
|
"variance": (
|
|
variance(memory_readings)
|
|
if len(memory_readings) > 1
|
|
else 0
|
|
),
|
|
}
|
|
|
|
def _get_time_stats(self, times):
|
|
"""Calculate time statistics"""
|
|
return {
|
|
"total": sum(times),
|
|
"mean": mean(times) if times else 0,
|
|
"median": median(times) if times else 0,
|
|
"min": min(times) if times else 0,
|
|
"max": max(times) if times else 0,
|
|
"stdev": stdev(times) if len(times) > 1 else 0,
|
|
"variance": variance(times) if len(times) > 1 else 0,
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# TOOL USAGE TESTS
|
|
# ============================================================================
|
|
|
|
|
|
class TestAgentToolUsage:
|
|
"""Test comprehensive tool usage functionality for agents"""
|
|
|
|
def test_normal_callable_tools(self):
|
|
"""Test normal callable tools (functions, lambdas, methods)"""
|
|
print("\nTesting normal callable tools...")
|
|
|
|
def math_tool(x: int, y: int) -> int:
|
|
"""Add two numbers together"""
|
|
return x + y
|
|
|
|
def string_tool(text: str) -> str:
|
|
"""Convert text to uppercase"""
|
|
return text.upper()
|
|
|
|
def list_tool(items: list) -> int:
|
|
"""Count items in a list"""
|
|
return len(items)
|
|
|
|
# Test with individual function tools
|
|
agent = Agent(
|
|
agent_name="Callable-Tools-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[math_tool, string_tool, list_tool],
|
|
)
|
|
|
|
# Test tool addition
|
|
assert len(agent.tools) == 3, "Tools not added correctly"
|
|
|
|
# Test tool execution
|
|
response = agent.run("Use the math tool to add 5 and 3")
|
|
assert response is not None, "Tool execution failed"
|
|
|
|
# Test lambda tools
|
|
def lambda_tool(x):
|
|
return x * 2
|
|
|
|
agent.add_tool(lambda_tool)
|
|
assert (
|
|
len(agent.tools) == 4
|
|
), "Lambda tool not added correctly"
|
|
|
|
# Test method tools
|
|
class MathOperations:
|
|
def multiply(self, x: int, y: int) -> int:
|
|
"""Multiply two numbers"""
|
|
return x * y
|
|
|
|
math_ops = MathOperations()
|
|
agent.add_tool(math_ops.multiply)
|
|
assert (
|
|
len(agent.tools) == 5
|
|
), "Method tool not added correctly"
|
|
|
|
print("✓ Normal callable tools test passed")
|
|
|
|
def test_tool_management_operations(self):
|
|
"""Test tool management operations (add, remove, list)"""
|
|
print("\nTesting tool management operations...")
|
|
|
|
def tool1(x: int) -> int:
|
|
"""Tool 1"""
|
|
return x + 1
|
|
|
|
def tool2(x: int) -> int:
|
|
"""Tool 2"""
|
|
return x * 2
|
|
|
|
def tool3(x: int) -> int:
|
|
"""Tool 3"""
|
|
return x - 1
|
|
|
|
agent = Agent(
|
|
agent_name="Tool-Management-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[tool1, tool2],
|
|
)
|
|
|
|
# Test initial tools
|
|
assert (
|
|
len(agent.tools) == 2
|
|
), "Initial tools not set correctly"
|
|
|
|
# Test adding single tool
|
|
agent.add_tool(tool3)
|
|
assert len(agent.tools) == 3, "Single tool addition failed"
|
|
|
|
# Test adding multiple tools
|
|
def tool4(x: int) -> int:
|
|
return x**2
|
|
|
|
def tool5(x: int) -> int:
|
|
return x // 2
|
|
|
|
agent.add_tools([tool4, tool5])
|
|
assert len(agent.tools) == 5, "Multiple tools addition failed"
|
|
|
|
# Test removing single tool
|
|
agent.remove_tool(tool1)
|
|
assert len(agent.tools) == 4, "Single tool removal failed"
|
|
|
|
# Test removing multiple tools
|
|
agent.remove_tools([tool2, tool3])
|
|
assert len(agent.tools) == 2, "Multiple tools removal failed"
|
|
|
|
print("✓ Tool management operations test passed")
|
|
|
|
def test_mcp_single_url_tools(self):
|
|
"""Test MCP single URL tools"""
|
|
print("\nTesting MCP single URL tools...")
|
|
|
|
# Mock MCP URL for testing
|
|
mock_mcp_url = "http://localhost:8000/mcp"
|
|
|
|
with patch(
|
|
"swarms.structs.agent.get_mcp_tools_sync"
|
|
) as mock_get_tools:
|
|
# Mock MCP tools response
|
|
mock_tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "mcp_calculator",
|
|
"description": "Perform calculations",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"expression": {
|
|
"type": "string",
|
|
"description": "Math expression",
|
|
}
|
|
},
|
|
"required": ["expression"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "mcp_weather",
|
|
"description": "Get weather information",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "City name",
|
|
}
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
},
|
|
},
|
|
]
|
|
mock_get_tools.return_value = mock_tools
|
|
|
|
agent = Agent(
|
|
agent_name="MCP-Single-URL-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
mcp_url=mock_mcp_url,
|
|
verbose=True,
|
|
)
|
|
|
|
# Test MCP tools integration
|
|
tools = agent.add_mcp_tools_to_memory()
|
|
assert len(tools) == 2, "MCP tools not loaded correctly"
|
|
assert (
|
|
mock_get_tools.called
|
|
), "MCP tools function not called"
|
|
|
|
# Verify tool structure
|
|
assert "mcp_calculator" in str(
|
|
tools
|
|
), "Calculator tool not found"
|
|
assert "mcp_weather" in str(
|
|
tools
|
|
), "Weather tool not found"
|
|
|
|
print("✓ MCP single URL tools test passed")
|
|
|
|
def test_mcp_multiple_urls_tools(self):
|
|
"""Test MCP multiple URLs tools"""
|
|
print("\nTesting MCP multiple URLs tools...")
|
|
|
|
# Mock multiple MCP URLs for testing
|
|
mock_mcp_urls = [
|
|
"http://localhost:8000/mcp1",
|
|
"http://localhost:8000/mcp2",
|
|
"http://localhost:8000/mcp3",
|
|
]
|
|
|
|
with patch(
|
|
"swarms.structs.agent.get_tools_for_multiple_mcp_servers"
|
|
) as mock_get_tools:
|
|
# Mock MCP tools response from multiple servers
|
|
mock_tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "server1_tool",
|
|
"description": "Tool from server 1",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"input": {"type": "string"}
|
|
},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "server2_tool",
|
|
"description": "Tool from server 2",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"data": {"type": "string"}
|
|
},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "server3_tool",
|
|
"description": "Tool from server 3",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {"type": "string"}
|
|
},
|
|
},
|
|
},
|
|
},
|
|
]
|
|
mock_get_tools.return_value = mock_tools
|
|
|
|
agent = Agent(
|
|
agent_name="MCP-Multiple-URLs-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
mcp_urls=mock_mcp_urls,
|
|
verbose=True,
|
|
)
|
|
|
|
# Test MCP tools integration from multiple servers
|
|
tools = agent.add_mcp_tools_to_memory()
|
|
assert (
|
|
len(tools) == 3
|
|
), "MCP tools from multiple servers not loaded correctly"
|
|
assert (
|
|
mock_get_tools.called
|
|
), "MCP multiple tools function not called"
|
|
|
|
# Verify tools from different servers
|
|
tools_str = str(tools)
|
|
assert (
|
|
"server1_tool" in tools_str
|
|
), "Server 1 tool not found"
|
|
assert (
|
|
"server2_tool" in tools_str
|
|
), "Server 2 tool not found"
|
|
assert (
|
|
"server3_tool" in tools_str
|
|
), "Server 3 tool not found"
|
|
|
|
print("✓ MCP multiple URLs tools test passed")
|
|
|
|
def test_base_tool_class_tools(self):
|
|
"""Test BaseTool class tools"""
|
|
print("\nTesting BaseTool class tools...")
|
|
|
|
from swarms.tools.base_tool import BaseTool
|
|
|
|
def sample_function(x: int, y: int) -> int:
|
|
"""Sample function for testing"""
|
|
return x + y
|
|
|
|
# Create BaseTool instance
|
|
base_tool = BaseTool(
|
|
verbose=True,
|
|
tools=[sample_function],
|
|
tool_system_prompt="You are a helpful tool assistant",
|
|
)
|
|
|
|
# Test tool schema generation
|
|
schema = base_tool.func_to_dict(sample_function)
|
|
assert isinstance(
|
|
schema, dict
|
|
), "Tool schema not generated correctly"
|
|
assert "name" in schema, "Tool name not in schema"
|
|
assert (
|
|
"description" in schema
|
|
), "Tool description not in schema"
|
|
assert "parameters" in schema, "Tool parameters not in schema"
|
|
|
|
# Test tool execution
|
|
test_input = {"x": 5, "y": 3}
|
|
result = base_tool.execute_tool(test_input)
|
|
assert result is not None, "Tool execution failed"
|
|
|
|
print("✓ BaseTool class tools test passed")
|
|
|
|
def test_tool_execution_and_error_handling(self):
|
|
"""Test tool execution and error handling"""
|
|
print("\nTesting tool execution and error handling...")
|
|
|
|
def valid_tool(x: int) -> int:
|
|
"""Valid tool that works correctly"""
|
|
return x * 2
|
|
|
|
def error_tool(x: int) -> int:
|
|
"""Tool that raises an error"""
|
|
raise ValueError("Test error")
|
|
|
|
def type_error_tool(x: str) -> str:
|
|
"""Tool with type error"""
|
|
return x.upper()
|
|
|
|
agent = Agent(
|
|
agent_name="Tool-Execution-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[valid_tool, error_tool, type_error_tool],
|
|
)
|
|
|
|
# Test valid tool execution
|
|
response = agent.run("Use the valid tool with input 5")
|
|
assert response is not None, "Valid tool execution failed"
|
|
|
|
# Test error handling
|
|
try:
|
|
agent.run("Use the error tool")
|
|
# Should handle error gracefully
|
|
except Exception:
|
|
# Expected to handle errors gracefully
|
|
pass
|
|
|
|
print("✓ Tool execution and error handling test passed")
|
|
|
|
def test_tool_schema_generation(self):
|
|
"""Test tool schema generation and validation"""
|
|
print("\nTesting tool schema generation...")
|
|
|
|
def complex_tool(
|
|
name: str,
|
|
age: int,
|
|
email: str = None,
|
|
is_active: bool = True,
|
|
) -> dict:
|
|
"""Complex tool with various parameter types"""
|
|
return {
|
|
"name": name,
|
|
"age": age,
|
|
"email": email,
|
|
"is_active": is_active,
|
|
}
|
|
|
|
agent = Agent(
|
|
agent_name="Tool-Schema-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[complex_tool],
|
|
)
|
|
|
|
# Test that tools are properly registered
|
|
assert len(agent.tools) == 1, "Tool not registered correctly"
|
|
|
|
# Test tool execution with complex parameters
|
|
response = agent.run(
|
|
"Use the complex tool with name 'John', age 30, email 'john@example.com'"
|
|
)
|
|
assert response is not None, "Complex tool execution failed"
|
|
|
|
print("✓ Tool schema generation test passed")
|
|
|
|
def test_aop_tools(self):
|
|
"""Test AOP (Agent Operations) tools"""
|
|
print("\nTesting AOP tools...")
|
|
|
|
from swarms.structs.aop import AOP
|
|
|
|
# Create test agents
|
|
agent1 = Agent(
|
|
agent_name="AOP-Agent-1",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
)
|
|
|
|
agent2 = Agent(
|
|
agent_name="AOP-Agent-2",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
)
|
|
|
|
# Create AOP instance
|
|
aop = AOP(
|
|
server_name="test-aop-server",
|
|
verbose=True,
|
|
)
|
|
|
|
# Test adding agents as tools
|
|
tool_names = aop.add_agents_batch(
|
|
agents=[agent1, agent2],
|
|
tool_names=["math_agent", "text_agent"],
|
|
tool_descriptions=[
|
|
"Performs mathematical operations",
|
|
"Handles text processing",
|
|
],
|
|
)
|
|
|
|
assert (
|
|
len(tool_names) == 2
|
|
), "AOP agents not added as tools correctly"
|
|
assert (
|
|
"math_agent" in tool_names
|
|
), "Math agent tool not created"
|
|
assert (
|
|
"text_agent" in tool_names
|
|
), "Text agent tool not created"
|
|
|
|
# Test tool discovery
|
|
tools = aop.get_available_tools()
|
|
assert len(tools) >= 2, "AOP tools not discovered correctly"
|
|
|
|
print("✓ AOP tools test passed")
|
|
|
|
def test_tool_choice_and_execution_modes(self):
|
|
"""Test different tool choice and execution modes"""
|
|
print("\nTesting tool choice and execution modes...")
|
|
|
|
def tool_a(x: int) -> int:
|
|
"""Tool A"""
|
|
return x + 1
|
|
|
|
def tool_b(x: int) -> int:
|
|
"""Tool B"""
|
|
return x * 2
|
|
|
|
# Test with auto tool choice
|
|
agent_auto = Agent(
|
|
agent_name="Auto-Tool-Choice-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[tool_a, tool_b],
|
|
tool_choice="auto",
|
|
)
|
|
|
|
response_auto = agent_auto.run(
|
|
"Calculate something using the available tools"
|
|
)
|
|
assert response_auto is not None, "Auto tool choice failed"
|
|
|
|
# Test with specific tool choice
|
|
agent_specific = Agent(
|
|
agent_name="Specific-Tool-Choice-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[tool_a, tool_b],
|
|
tool_choice="tool_a",
|
|
)
|
|
|
|
response_specific = agent_specific.run(
|
|
"Use tool_a with input 5"
|
|
)
|
|
assert (
|
|
response_specific is not None
|
|
), "Specific tool choice failed"
|
|
|
|
# Test with tool execution enabled/disabled
|
|
agent_execute = Agent(
|
|
agent_name="Tool-Execute-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[tool_a, tool_b],
|
|
execute_tool=True,
|
|
)
|
|
|
|
response_execute = agent_execute.run("Execute a tool")
|
|
assert (
|
|
response_execute is not None
|
|
), "Tool execution mode failed"
|
|
|
|
print("✓ Tool choice and execution modes test passed")
|
|
|
|
def test_tool_system_prompts(self):
|
|
"""Test tool system prompts and custom tool prompts"""
|
|
print("\nTesting tool system prompts...")
|
|
|
|
def calculator_tool(expression: str) -> str:
|
|
"""Calculate mathematical expressions"""
|
|
try:
|
|
result = eval(expression)
|
|
return str(result)
|
|
except Exception:
|
|
return "Invalid expression"
|
|
|
|
custom_tool_prompt = "You have access to a calculator tool. Use it for mathematical calculations."
|
|
|
|
agent = Agent(
|
|
agent_name="Tool-Prompt-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[calculator_tool],
|
|
tool_system_prompt=custom_tool_prompt,
|
|
)
|
|
|
|
# Test that custom tool prompt is set
|
|
assert (
|
|
agent.tool_system_prompt == custom_tool_prompt
|
|
), "Custom tool prompt not set"
|
|
|
|
# Test tool execution with custom prompt
|
|
response = agent.run("Calculate 2 + 2 * 3")
|
|
assert (
|
|
response is not None
|
|
), "Tool execution with custom prompt failed"
|
|
|
|
print("✓ Tool system prompts test passed")
|
|
|
|
def test_tool_parallel_execution(self):
|
|
"""Test parallel tool execution capabilities"""
|
|
print("\nTesting parallel tool execution...")
|
|
|
|
def slow_tool(x: int) -> int:
|
|
"""Slow tool that takes time"""
|
|
import time
|
|
|
|
time.sleep(0.1) # Simulate slow operation
|
|
return x * 2
|
|
|
|
def fast_tool(x: int) -> int:
|
|
"""Fast tool"""
|
|
return x + 1
|
|
|
|
agent = Agent(
|
|
agent_name="Parallel-Tool-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[slow_tool, fast_tool],
|
|
)
|
|
|
|
# Test parallel tool execution
|
|
start_time = time.time()
|
|
response = agent.run("Use both tools with input 5")
|
|
end_time = time.time()
|
|
|
|
assert response is not None, "Parallel tool execution failed"
|
|
# Should be faster than sequential execution
|
|
assert (
|
|
end_time - start_time
|
|
) < 0.5, "Parallel execution took too long"
|
|
|
|
print("✓ Parallel tool execution test passed")
|
|
|
|
def test_tool_validation_and_type_checking(self):
|
|
"""Test tool validation and type checking"""
|
|
print("\nTesting tool validation and type checking...")
|
|
|
|
def typed_tool(x: int, y: str, z: bool = False) -> dict:
|
|
"""Tool with specific type hints"""
|
|
return {"x": x, "y": y, "z": z, "result": f"{x} {y} {z}"}
|
|
|
|
agent = Agent(
|
|
agent_name="Tool-Validation-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[typed_tool],
|
|
)
|
|
|
|
# Test tool execution with correct types
|
|
response = agent.run(
|
|
"Use typed_tool with x=5, y='hello', z=True"
|
|
)
|
|
assert response is not None, "Typed tool execution failed"
|
|
|
|
# Test tool execution with incorrect types (should handle gracefully)
|
|
try:
|
|
agent.run("Use typed_tool with incorrect types")
|
|
except Exception:
|
|
# Expected to handle type errors gracefully
|
|
pass
|
|
|
|
print("✓ Tool validation and type checking test passed")
|
|
|
|
def test_tool_caching_and_performance(self):
|
|
"""Test tool caching and performance optimization"""
|
|
print("\nTesting tool caching and performance...")
|
|
|
|
call_count = 0
|
|
|
|
def cached_tool(x: int) -> int:
|
|
"""Tool that should be cached"""
|
|
nonlocal call_count
|
|
call_count += 1
|
|
return x**2
|
|
|
|
agent = Agent(
|
|
agent_name="Tool-Caching-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[cached_tool],
|
|
)
|
|
|
|
# Test multiple calls to the same tool
|
|
agent.run("Use cached_tool with input 5")
|
|
agent.run("Use cached_tool with input 5 again")
|
|
|
|
# Verify tool was called (caching behavior may vary)
|
|
assert call_count >= 1, "Tool not called at least once"
|
|
|
|
print("✓ Tool caching and performance test passed")
|
|
|
|
def test_tool_error_recovery(self):
|
|
"""Test tool error recovery and fallback mechanisms"""
|
|
print("\nTesting tool error recovery...")
|
|
|
|
def unreliable_tool(x: int) -> int:
|
|
"""Tool that sometimes fails"""
|
|
import random
|
|
|
|
if random.random() < 0.5:
|
|
raise Exception("Random failure")
|
|
return x * 2
|
|
|
|
def fallback_tool(x: int) -> int:
|
|
"""Fallback tool"""
|
|
return x + 10
|
|
|
|
agent = Agent(
|
|
agent_name="Tool-Recovery-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[unreliable_tool, fallback_tool],
|
|
retry_attempts=3,
|
|
)
|
|
|
|
# Test error recovery
|
|
response = agent.run("Use unreliable_tool with input 5")
|
|
assert response is not None, "Tool error recovery failed"
|
|
|
|
print("✓ Tool error recovery test passed")
|
|
|
|
def test_tool_with_different_output_types(self):
|
|
"""Test tools with different output types"""
|
|
print("\nTesting tools with different output types...")
|
|
|
|
def json_tool(data: dict) -> str:
|
|
"""Tool that returns JSON string"""
|
|
import json
|
|
|
|
return json.dumps(data)
|
|
|
|
def yaml_tool(data: dict) -> str:
|
|
"""Tool that returns YAML string"""
|
|
import yaml
|
|
|
|
return yaml.dump(data)
|
|
|
|
def dict_tool(x: int) -> dict:
|
|
"""Tool that returns dictionary"""
|
|
return {"value": x, "squared": x**2}
|
|
|
|
agent = Agent(
|
|
agent_name="Output-Types-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[json_tool, yaml_tool, dict_tool],
|
|
)
|
|
|
|
# Test JSON tool
|
|
response = agent.run(
|
|
"Use json_tool with data {'name': 'test', 'value': 123}"
|
|
)
|
|
assert response is not None, "JSON tool execution failed"
|
|
|
|
# Test YAML tool
|
|
response = agent.run(
|
|
"Use yaml_tool with data {'key': 'value'}"
|
|
)
|
|
assert response is not None, "YAML tool execution failed"
|
|
|
|
# Test dict tool
|
|
response = agent.run("Use dict_tool with input 5")
|
|
assert response is not None, "Dict tool execution failed"
|
|
|
|
print("✓ Tools with different output types test passed")
|
|
|
|
def test_tool_with_async_execution(self):
|
|
"""Test tools with async execution"""
|
|
print("\nTesting tools with async execution...")
|
|
|
|
async def async_tool(x: int) -> int:
|
|
"""Async tool that performs async operation"""
|
|
import asyncio
|
|
|
|
await asyncio.sleep(0.01) # Simulate async operation
|
|
return x * 2
|
|
|
|
def sync_tool(x: int) -> int:
|
|
"""Sync tool"""
|
|
return x + 1
|
|
|
|
agent = Agent(
|
|
agent_name="Async-Tool-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[
|
|
sync_tool
|
|
], # Note: async tools need special handling
|
|
)
|
|
|
|
# Test sync tool execution
|
|
response = agent.run("Use sync_tool with input 5")
|
|
assert response is not None, "Sync tool execution failed"
|
|
|
|
print("✓ Tools with async execution test passed")
|
|
|
|
def test_tool_with_file_operations(self):
|
|
"""Test tools that perform file operations"""
|
|
print("\nTesting tools with file operations...")
|
|
|
|
import os
|
|
import tempfile
|
|
|
|
def file_writer_tool(filename: str, content: str) -> str:
|
|
"""Tool that writes content to a file"""
|
|
with open(filename, "w") as f:
|
|
f.write(content)
|
|
return f"Written {len(content)} characters to {filename}"
|
|
|
|
def file_reader_tool(filename: str) -> str:
|
|
"""Tool that reads content from a file"""
|
|
try:
|
|
with open(filename, "r") as f:
|
|
return f.read()
|
|
except FileNotFoundError:
|
|
return "File not found"
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
test_file = os.path.join(temp_dir, "test.txt")
|
|
|
|
agent = Agent(
|
|
agent_name="File-Ops-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[file_writer_tool, file_reader_tool],
|
|
)
|
|
|
|
# Test file writing
|
|
response = agent.run(
|
|
f"Use file_writer_tool to write 'Hello World' to {test_file}"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "File writing tool execution failed"
|
|
|
|
# Test file reading
|
|
response = agent.run(
|
|
f"Use file_reader_tool to read from {test_file}"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "File reading tool execution failed"
|
|
|
|
print("✓ Tools with file operations test passed")
|
|
|
|
def test_tool_with_network_operations(self):
|
|
"""Test tools that perform network operations"""
|
|
print("\nTesting tools with network operations...")
|
|
|
|
def url_tool(url: str) -> str:
|
|
"""Tool that processes URLs"""
|
|
return f"Processing URL: {url}"
|
|
|
|
def api_tool(endpoint: str, method: str = "GET") -> str:
|
|
"""Tool that simulates API calls"""
|
|
return f"API {method} request to {endpoint}"
|
|
|
|
agent = Agent(
|
|
agent_name="Network-Ops-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[url_tool, api_tool],
|
|
)
|
|
|
|
# Test URL tool
|
|
response = agent.run(
|
|
"Use url_tool with 'https://example.com'"
|
|
)
|
|
assert response is not None, "URL tool execution failed"
|
|
|
|
# Test API tool
|
|
response = agent.run(
|
|
"Use api_tool with endpoint '/api/data' and method 'POST'"
|
|
)
|
|
assert response is not None, "API tool execution failed"
|
|
|
|
print("✓ Tools with network operations test passed")
|
|
|
|
def test_tool_with_database_operations(self):
|
|
"""Test tools that perform database operations"""
|
|
print("\nTesting tools with database operations...")
|
|
|
|
def db_query_tool(query: str) -> str:
|
|
"""Tool that simulates database queries"""
|
|
return f"Executed query: {query}"
|
|
|
|
def db_insert_tool(table: str, data: dict) -> str:
|
|
"""Tool that simulates database inserts"""
|
|
return f"Inserted data into {table}: {data}"
|
|
|
|
agent = Agent(
|
|
agent_name="Database-Ops-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[db_query_tool, db_insert_tool],
|
|
)
|
|
|
|
# Test database query
|
|
response = agent.run(
|
|
"Use db_query_tool with 'SELECT * FROM users'"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "Database query tool execution failed"
|
|
|
|
# Test database insert
|
|
response = agent.run(
|
|
"Use db_insert_tool with table 'users' and data {'name': 'John'}"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "Database insert tool execution failed"
|
|
|
|
print("✓ Tools with database operations test passed")
|
|
|
|
def test_tool_with_machine_learning_operations(self):
|
|
"""Test tools that perform ML operations"""
|
|
print("\nTesting tools with ML operations...")
|
|
|
|
def predict_tool(features: list) -> str:
|
|
"""Tool that simulates ML predictions"""
|
|
return f"Prediction for features {features}: 0.85"
|
|
|
|
def train_tool(model_name: str, data_size: int) -> str:
|
|
"""Tool that simulates model training"""
|
|
return f"Trained {model_name} with {data_size} samples"
|
|
|
|
agent = Agent(
|
|
agent_name="ML-Ops-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[predict_tool, train_tool],
|
|
)
|
|
|
|
# Test ML prediction
|
|
response = agent.run(
|
|
"Use predict_tool with features [1, 2, 3, 4]"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "ML prediction tool execution failed"
|
|
|
|
# Test ML training
|
|
response = agent.run(
|
|
"Use train_tool with model 'random_forest' and data_size 1000"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "ML training tool execution failed"
|
|
|
|
print("✓ Tools with ML operations test passed")
|
|
|
|
def test_tool_with_image_processing(self):
|
|
"""Test tools that perform image processing"""
|
|
print("\nTesting tools with image processing...")
|
|
|
|
def resize_tool(
|
|
image_path: str, width: int, height: int
|
|
) -> str:
|
|
"""Tool that simulates image resizing"""
|
|
return f"Resized {image_path} to {width}x{height}"
|
|
|
|
def filter_tool(image_path: str, filter_type: str) -> str:
|
|
"""Tool that simulates image filtering"""
|
|
return f"Applied {filter_type} filter to {image_path}"
|
|
|
|
agent = Agent(
|
|
agent_name="Image-Processing-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[resize_tool, filter_tool],
|
|
)
|
|
|
|
# Test image resizing
|
|
response = agent.run(
|
|
"Use resize_tool with image 'test.jpg', width 800, height 600"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "Image resize tool execution failed"
|
|
|
|
# Test image filtering
|
|
response = agent.run(
|
|
"Use filter_tool with image 'test.jpg' and filter 'blur'"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "Image filter tool execution failed"
|
|
|
|
print("✓ Tools with image processing test passed")
|
|
|
|
def test_tool_with_text_processing(self):
|
|
"""Test tools that perform text processing"""
|
|
print("\nTesting tools with text processing...")
|
|
|
|
def tokenize_tool(text: str) -> list:
|
|
"""Tool that tokenizes text"""
|
|
return text.split()
|
|
|
|
def translate_tool(text: str, target_lang: str) -> str:
|
|
"""Tool that simulates translation"""
|
|
return f"Translated '{text}' to {target_lang}"
|
|
|
|
def sentiment_tool(text: str) -> str:
|
|
"""Tool that simulates sentiment analysis"""
|
|
return f"Sentiment of '{text}': positive"
|
|
|
|
agent = Agent(
|
|
agent_name="Text-Processing-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[tokenize_tool, translate_tool, sentiment_tool],
|
|
)
|
|
|
|
# Test text tokenization
|
|
response = agent.run(
|
|
"Use tokenize_tool with 'Hello world this is a test'"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "Text tokenization tool execution failed"
|
|
|
|
# Test translation
|
|
response = agent.run(
|
|
"Use translate_tool with 'Hello' and target_lang 'Spanish'"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "Translation tool execution failed"
|
|
|
|
# Test sentiment analysis
|
|
response = agent.run(
|
|
"Use sentiment_tool with 'I love this product!'"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "Sentiment analysis tool execution failed"
|
|
|
|
print("✓ Tools with text processing test passed")
|
|
|
|
def test_tool_with_mathematical_operations(self):
|
|
"""Test tools that perform mathematical operations"""
|
|
print("\nTesting tools with mathematical operations...")
|
|
|
|
def matrix_multiply_tool(
|
|
matrix_a: list, matrix_b: list
|
|
) -> list:
|
|
"""Tool that multiplies matrices"""
|
|
# Simple 2x2 matrix multiplication
|
|
result = [[0, 0], [0, 0]]
|
|
for i in range(2):
|
|
for j in range(2):
|
|
for k in range(2):
|
|
result[i][j] += (
|
|
matrix_a[i][k] * matrix_b[k][j]
|
|
)
|
|
return result
|
|
|
|
def statistics_tool(data: list) -> dict:
|
|
"""Tool that calculates statistics"""
|
|
return {
|
|
"mean": sum(data) / len(data),
|
|
"max": max(data),
|
|
"min": min(data),
|
|
"count": len(data),
|
|
}
|
|
|
|
def calculus_tool(function: str, x: float) -> str:
|
|
"""Tool that simulates calculus operations"""
|
|
return f"Derivative of {function} at x={x}: 2*x"
|
|
|
|
agent = Agent(
|
|
agent_name="Math-Ops-Test-Agent",
|
|
model_name="gpt-4o-mini",
|
|
max_loops=1,
|
|
tools=[
|
|
matrix_multiply_tool,
|
|
statistics_tool,
|
|
calculus_tool,
|
|
],
|
|
)
|
|
|
|
# Test matrix multiplication
|
|
response = agent.run(
|
|
"Use matrix_multiply_tool with [[1,2],[3,4]] and [[5,6],[7,8]]"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "Matrix multiplication tool execution failed"
|
|
|
|
# Test statistics
|
|
response = agent.run(
|
|
"Use statistics_tool with [1, 2, 3, 4, 5]"
|
|
)
|
|
assert (
|
|
response is not None
|
|
), "Statistics tool execution failed"
|
|
|
|
# Test calculus
|
|
response = agent.run("Use calculus_tool with 'x^2' and x=3")
|
|
assert response is not None, "Calculus tool execution failed"
|
|
|
|
print("✓ Tools with mathematical operations test passed")
|
|
|
|
|
|
# ============================================================================
|
|
# LLM ARGS AND HANDLING TESTS
|
|
# ============================================================================
|
|
|
|
|
|
class TestLLMArgsAndHandling:
|
|
"""Test LLM arguments and handling functionality"""
|
|
|
|
def test_combined_llm_args(self):
|
|
"""Test that llm_args, tools_list_dictionary, and MCP tools can be combined."""
|
|
print("\nTesting combined LLM args...")
|
|
|
|
# Mock tools list dictionary
|
|
tools_list = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "test_function",
|
|
"description": "A test function",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"test_param": {
|
|
"type": "string",
|
|
"description": "A test parameter",
|
|
}
|
|
},
|
|
},
|
|
},
|
|
}
|
|
]
|
|
|
|
# Mock llm_args with Azure OpenAI specific parameters
|
|
llm_args = {
|
|
"api_version": "2024-02-15-preview",
|
|
"base_url": "https://your-resource.openai.azure.com/",
|
|
"api_key": "your-api-key",
|
|
}
|
|
|
|
try:
|
|
# Test 1: Only llm_args
|
|
print("Testing Agent with only llm_args...")
|
|
Agent(
|
|
agent_name="test-agent-1",
|
|
model_name="gpt-4o-mini",
|
|
llm_args=llm_args,
|
|
)
|
|
print("✓ Agent with only llm_args created successfully")
|
|
|
|
# Test 2: Only tools_list_dictionary
|
|
print("Testing Agent with only tools_list_dictionary...")
|
|
Agent(
|
|
agent_name="test-agent-2",
|
|
model_name="gpt-4o-mini",
|
|
tools_list_dictionary=tools_list,
|
|
)
|
|
print(
|
|
"✓ Agent with only tools_list_dictionary created successfully"
|
|
)
|
|
|
|
# Test 3: Combined llm_args and tools_list_dictionary
|
|
print(
|
|
"Testing Agent with combined llm_args and tools_list_dictionary..."
|
|
)
|
|
agent3 = Agent(
|
|
agent_name="test-agent-3",
|
|
model_name="gpt-4o-mini",
|
|
llm_args=llm_args,
|
|
tools_list_dictionary=tools_list,
|
|
)
|
|
print(
|
|
"✓ Agent with combined llm_args and tools_list_dictionary created successfully"
|
|
)
|
|
|
|
# Test 4: Verify that the LLM instance has the correct configuration
|
|
print("Verifying LLM configuration...")
|
|
|
|
# Check that agent3 has both llm_args and tools configured
|
|
assert (
|
|
agent3.llm_args == llm_args
|
|
), "llm_args not preserved"
|
|
assert (
|
|
agent3.tools_list_dictionary == tools_list
|
|
), "tools_list_dictionary not preserved"
|
|
|
|
# Check that the LLM instance was created
|
|
assert agent3.llm is not None, "LLM instance not created"
|
|
|
|
print("✓ LLM configuration verified successfully")
|
|
print("✓ Combined LLM args test passed")
|
|
|
|
except Exception as e:
|
|
print(f"✗ Combined LLM args test failed: {e}")
|
|
raise
|
|
|
|
def test_azure_openai_example(self):
|
|
"""Test the Azure OpenAI example with api_version parameter."""
|
|
print("\nTesting Azure OpenAI example with api_version...")
|
|
|
|
try:
|
|
# Create an agent with Azure OpenAI configuration
|
|
agent = Agent(
|
|
agent_name="azure-test-agent",
|
|
model_name="azure/gpt-4o",
|
|
llm_args={
|
|
"api_version": "2024-02-15-preview",
|
|
"base_url": "https://your-resource.openai.azure.com/",
|
|
"api_key": "your-api-key",
|
|
},
|
|
tools_list_dictionary=[
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_weather",
|
|
"description": "Get weather information",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "The city and state",
|
|
}
|
|
},
|
|
},
|
|
},
|
|
}
|
|
],
|
|
)
|
|
|
|
print(
|
|
"✓ Azure OpenAI agent with combined parameters created successfully"
|
|
)
|
|
|
|
# Verify configuration
|
|
assert agent.llm_args is not None, "llm_args not set"
|
|
assert (
|
|
"api_version" in agent.llm_args
|
|
), "api_version not in llm_args"
|
|
assert (
|
|
agent.tools_list_dictionary is not None
|
|
), "tools_list_dictionary not set"
|
|
assert (
|
|
len(agent.tools_list_dictionary) > 0
|
|
), "tools_list_dictionary is empty"
|
|
|
|
print("✓ Azure OpenAI configuration verified")
|
|
print("✓ Azure OpenAI example test passed")
|
|
|
|
except Exception as e:
|
|
print(f"✗ Azure OpenAI test failed: {e}")
|
|
raise
|
|
|
|
def test_llm_handling_args_kwargs(self):
|
|
"""Test that llm_handling properly handles both args and kwargs."""
|
|
print("\nTesting LLM handling args and kwargs...")
|
|
|
|
# Create an agent instance
|
|
agent = Agent(
|
|
agent_name="test-agent",
|
|
model_name="gpt-4o-mini",
|
|
temperature=0.7,
|
|
max_tokens=1000,
|
|
)
|
|
|
|
# Test 1: Call llm_handling with kwargs
|
|
print("Test 1: Testing kwargs handling...")
|
|
try:
|
|
# This should work and add the kwargs to additional_args
|
|
agent.llm_handling(top_p=0.9, frequency_penalty=0.1)
|
|
print("✓ kwargs handling works")
|
|
except Exception as e:
|
|
print(f"✗ kwargs handling failed: {e}")
|
|
raise
|
|
|
|
# Test 2: Call llm_handling with args (dictionary)
|
|
print("Test 2: Testing args handling with dictionary...")
|
|
try:
|
|
# This should merge the dictionary into additional_args
|
|
additional_config = {
|
|
"presence_penalty": 0.2,
|
|
"logit_bias": {"123": 1},
|
|
}
|
|
agent.llm_handling(additional_config)
|
|
print("✓ args handling with dictionary works")
|
|
except Exception as e:
|
|
print(f"✗ args handling with dictionary failed: {e}")
|
|
raise
|
|
|
|
# Test 3: Call llm_handling with both args and kwargs
|
|
print("Test 3: Testing both args and kwargs...")
|
|
try:
|
|
# This should handle both
|
|
additional_config = {"presence_penalty": 0.3}
|
|
agent.llm_handling(
|
|
additional_config, top_p=0.8, frequency_penalty=0.2
|
|
)
|
|
print("✓ combined args and kwargs handling works")
|
|
except Exception as e:
|
|
print(f"✗ combined args and kwargs handling failed: {e}")
|
|
raise
|
|
|
|
# Test 4: Call llm_handling with non-dictionary args
|
|
print("Test 4: Testing non-dictionary args...")
|
|
try:
|
|
# This should store args under 'additional_args' key
|
|
agent.llm_handling(
|
|
"some_string", 123, ["list", "of", "items"]
|
|
)
|
|
print("✓ non-dictionary args handling works")
|
|
except Exception as e:
|
|
print(f"✗ non-dictionary args handling failed: {e}")
|
|
raise
|
|
|
|
print("✓ LLM handling args and kwargs test passed")
|
|
|
|
|
|
# ============================================================================
|
|
# MAIN TEST RUNNER
|
|
# ============================================================================
|
|
|
|
|
|
def run_all_tests():
|
|
"""Run all test functions"""
|
|
print("Starting Merged Agent Test Suite...\n")
|
|
|
|
# Test classes to run
|
|
test_classes = [
|
|
TestBasicAgent,
|
|
TestAgentFeatures,
|
|
TestAgentLogging,
|
|
TestCreateAgentsFromYaml,
|
|
TestAgentBenchmark,
|
|
TestAgentToolUsage,
|
|
TestLLMArgsAndHandling,
|
|
]
|
|
|
|
total_tests = 0
|
|
passed_tests = 0
|
|
failed_tests = 0
|
|
|
|
for test_class in test_classes:
|
|
print(f"\n{'='*50}")
|
|
print(f"Running {test_class.__name__}")
|
|
print(f"{'='*50}")
|
|
|
|
# Create test instance
|
|
test_instance = test_class()
|
|
|
|
# Get all test methods
|
|
test_methods = [
|
|
method
|
|
for method in dir(test_instance)
|
|
if method.startswith("test_")
|
|
]
|
|
|
|
for test_method in test_methods:
|
|
total_tests += 1
|
|
try:
|
|
# Run the test method
|
|
getattr(test_instance, test_method)()
|
|
passed_tests += 1
|
|
print(f"✓ {test_method}")
|
|
except Exception as e:
|
|
failed_tests += 1
|
|
print(f"✗ {test_method}: {str(e)}")
|
|
|
|
# Print summary
|
|
print(f"\n{'='*50}")
|
|
print("Test Summary")
|
|
print(f"{'='*50}")
|
|
print(f"Total Tests: {total_tests}")
|
|
print(f"Passed: {passed_tests}")
|
|
print(f"Failed: {failed_tests}")
|
|
print(f"Success Rate: {(passed_tests/total_tests)*100:.2f}%")
|
|
|
|
return {
|
|
"total": total_tests,
|
|
"passed": passed_tests,
|
|
"failed": failed_tests,
|
|
"success_rate": (passed_tests / total_tests) * 100,
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run all tests
|
|
results = run_all_tests()
|
|
|
|
print(results)
|