import asyncio import json import os import tempfile import time import unittest from statistics import mean, median, stdev, variance from unittest.mock import MagicMock, patch import psutil import pytest import yaml from dotenv import load_dotenv from rich.console import Console from rich.table import Table from swarms import ( Agent, create_agents_from_yaml, ) # Load environment variables load_dotenv() # Global test configuration openai_api_key = os.getenv("OPENAI_API_KEY") # ============================================================================ # FIXTURES AND UTILITIES # ============================================================================ @pytest.fixture def basic_flow(mocked_llm): """Basic agent flow for testing""" return Agent(llm=mocked_llm, max_loops=1) @pytest.fixture def flow_with_condition(mocked_llm): """Agent flow with stopping condition""" from swarms.structs.agent import stop_when_repeats return Agent( llm=mocked_llm, max_loops=1, stopping_condition=stop_when_repeats, ) @pytest.fixture def mock_agents(): """Mock agents for testing""" class MockAgent: def __init__(self, name): self.name = name self.agent_name = name def run(self, task, img=None, *args, **kwargs): return f"{self.name} processed {task}" return [ MockAgent(name="Agent1"), MockAgent(name="Agent2"), MockAgent(name="Agent3"), ] @pytest.fixture def test_agent(): """Create a real agent for testing""" with patch("swarms.structs.agent.LiteLLM") as mock_llm: mock_llm.return_value.run.return_value = "Test response" return Agent( agent_name="test_agent", agent_description="A test agent", system_prompt="You are a test agent", model_name="gpt-4o-mini", max_loops=1, verbose=False, print_on=False, ) # ============================================================================ # BASIC AGENT TESTS # ============================================================================ class TestBasicAgent: """Test basic agent functionality""" def test_stop_when_repeats(self): """Test stopping condition function""" from swarms.structs.agent import stop_when_repeats assert stop_when_repeats("Please Stop now") assert not stop_when_repeats("Continue the process") def test_flow_initialization(self, basic_flow): """Test agent initialization""" assert basic_flow.max_loops == 5 assert basic_flow.stopping_condition is None assert basic_flow.loop_interval == 1 assert basic_flow.retry_attempts == 3 assert basic_flow.retry_interval == 1 assert basic_flow.feedback == [] assert basic_flow.memory == [] assert basic_flow.task is None assert basic_flow.stopping_token == "" assert not basic_flow.interactive def test_provide_feedback(self, basic_flow): """Test feedback functionality""" feedback = "Test feedback" basic_flow.provide_feedback(feedback) assert feedback in basic_flow.feedback @patch("time.sleep", return_value=None) def test_run_without_stopping_condition( self, mocked_sleep, basic_flow ): """Test running without stopping condition""" response = basic_flow.run("Test task") assert response is not None @patch("time.sleep", return_value=None) def test_run_with_stopping_condition( self, mocked_sleep, flow_with_condition ): """Test running with stopping condition""" response = flow_with_condition.run("Stop") assert response is not None def test_bulk_run(self, basic_flow): """Test bulk run functionality""" inputs = [{"task": "Test1"}, {"task": "Test2"}] responses = basic_flow.bulk_run(inputs) assert responses is not None def test_save_and_load(self, basic_flow, tmp_path): """Test save and load functionality""" file_path = tmp_path / "memory.json" basic_flow.memory.append(["Test1", "Test2"]) basic_flow.save(file_path) new_flow = Agent(llm=basic_flow.llm, max_loops=5) new_flow.load(file_path) assert new_flow.memory == [["Test1", "Test2"]] def test_flow_call(self, basic_flow): """Test calling agent directly""" response = basic_flow("Test call") assert response == "Test call" def test_format_prompt(self, basic_flow): """Test prompt formatting""" formatted_prompt = basic_flow.format_prompt( "Hello {name}", name="John" ) assert formatted_prompt == "Hello John" # ============================================================================ # AGENT FEATURES TESTS # ============================================================================ class TestAgentFeatures: """Test advanced agent features""" def test_basic_agent_functionality(self): """Test basic agent initialization and task execution""" print("\nTesting basic agent functionality...") agent = Agent( agent_name="Test-Agent", model_name="gpt-4.1", max_loops=1 ) response = agent.run("What is 2+2?") assert ( response is not None ), "Agent response should not be None" # Test agent properties assert ( agent.agent_name == "Test-Agent" ), "Agent name not set correctly" assert agent.max_loops == 1, "Max loops not set correctly" assert agent.llm is not None, "LLM not initialized" print("✓ Basic agent functionality test passed") def test_memory_management(self): """Test agent memory management functionality""" print("\nTesting memory management...") agent = Agent( agent_name="Memory-Test-Agent", max_loops=1, model_name="gpt-4.1", context_length=8192, ) # Test adding to memory agent.add_memory("Test memory entry") assert ( "Test memory entry" in agent.short_memory.return_history_as_string() ) # Test memory query agent.memory_query("Test query") # Test token counting tokens = agent.check_available_tokens() assert isinstance( tokens, int ), "Token count should be an integer" print("✓ Memory management test passed") def test_agent_output_formats(self): """Test all available output formats""" print("\nTesting all output formats...") test_task = "Say hello!" output_types = { "str": str, "string": str, "list": str, # JSON string containing list "json": str, # JSON string "dict": dict, "yaml": str, } for output_type, expected_type in output_types.items(): agent = Agent( agent_name=f"{output_type.capitalize()}-Output-Agent", model_name="gpt-4.1", max_loops=1, output_type=output_type, ) response = agent.run(test_task) assert ( response is not None ), f"{output_type} output should not be None" if output_type == "yaml": # Verify YAML can be parsed try: yaml.safe_load(response) print(f"✓ {output_type} output valid") except yaml.YAMLError: assert ( False ), f"Invalid YAML output for {output_type}" elif output_type in ["json", "list"]: # Verify JSON can be parsed try: json.loads(response) print(f"✓ {output_type} output valid") except json.JSONDecodeError: assert ( False ), f"Invalid JSON output for {output_type}" print("✓ Output formats test passed") def test_agent_state_management(self): """Test comprehensive state management functionality""" print("\nTesting state management...") # Create temporary directory for test files with tempfile.TemporaryDirectory() as temp_dir: state_path = os.path.join(temp_dir, "agent_state.json") # Create agent with initial state agent1 = Agent( agent_name="State-Test-Agent", model_name="gpt-4.1", max_loops=1, saved_state_path=state_path, ) # Add some data to the agent agent1.run("Remember this: Test message 1") agent1.add_memory("Test message 2") # Save state agent1.save() assert os.path.exists( state_path ), "State file not created" # Create new agent and load state agent2 = Agent( agent_name="State-Test-Agent", model_name="gpt-4.1", max_loops=1, ) agent2.load(state_path) # Verify state loaded correctly history2 = agent2.short_memory.return_history_as_string() assert ( "Test message 1" in history2 ), "State not loaded correctly" assert ( "Test message 2" in history2 ), "Memory not loaded correctly" # Test autosave functionality agent3 = Agent( agent_name="Autosave-Test-Agent", model_name="gpt-4.1", max_loops=1, saved_state_path=os.path.join( temp_dir, "autosave_state.json" ), autosave=True, ) agent3.run("Test autosave") time.sleep(2) # Wait for autosave assert os.path.exists( os.path.join(temp_dir, "autosave_state.json") ), "Autosave file not created" print("✓ State management test passed") def test_agent_tools_and_execution(self): """Test agent tool handling and execution""" print("\nTesting tools and execution...") def sample_tool(x: int, y: int) -> int: """Sample tool that adds two numbers""" return x + y agent = Agent( agent_name="Tools-Test-Agent", model_name="gpt-4.1", max_loops=1, tools=[sample_tool], ) # Test adding tools agent.add_tool(lambda x: x * 2) assert len(agent.tools) == 2, "Tool not added correctly" # Test removing tools agent.remove_tool(sample_tool) assert len(agent.tools) == 1, "Tool not removed correctly" # Test tool execution response = agent.run("Calculate 2 + 2 using the sample tool") assert response is not None, "Tool execution failed" print("✓ Tools and execution test passed") def test_agent_concurrent_execution(self): """Test agent concurrent execution capabilities""" print("\nTesting concurrent execution...") agent = Agent( agent_name="Concurrent-Test-Agent", model_name="gpt-4.1", max_loops=1, ) # Test bulk run tasks = [ {"task": "Count to 3"}, {"task": "Say hello"}, {"task": "Tell a short joke"}, ] responses = agent.bulk_run(tasks) assert len(responses) == len(tasks), "Not all tasks completed" assert all( response is not None for response in responses ), "Some tasks failed" # Test concurrent tasks concurrent_responses = agent.run_concurrent_tasks( ["Task 1", "Task 2", "Task 3"] ) assert ( len(concurrent_responses) == 3 ), "Not all concurrent tasks completed" print("✓ Concurrent execution test passed") def test_agent_error_handling(self): """Test agent error handling and recovery""" print("\nTesting error handling...") agent = Agent( agent_name="Error-Test-Agent", model_name="gpt-4.1", max_loops=1, retry_attempts=3, retry_interval=1, ) # Test invalid tool execution try: agent.parse_and_execute_tools("invalid_json") print("✓ Invalid tool execution handled") except Exception: assert True, "Expected error caught" # Test recovery after error response = agent.run("Continue after error") assert ( response is not None ), "Agent failed to recover after error" print("✓ Error handling test passed") def test_agent_configuration(self): """Test agent configuration and parameters""" print("\nTesting agent configuration...") agent = Agent( agent_name="Config-Test-Agent", model_name="gpt-4.1", max_loops=1, temperature=0.7, max_tokens=4000, context_length=8192, ) # Test configuration methods agent.update_system_prompt("New system prompt") agent.update_max_loops(2) agent.update_loop_interval(2) # Verify updates assert agent.max_loops == 2, "Max loops not updated" assert agent.loop_interval == 2, "Loop interval not updated" # Test configuration export config_dict = agent.to_dict() assert isinstance( config_dict, dict ), "Configuration export failed" # Test YAML export yaml_config = agent.to_yaml() assert isinstance(yaml_config, str), "YAML export failed" print("✓ Configuration test passed") def test_agent_with_stopping_condition(self): """Test agent with custom stopping condition""" print("\nTesting agent with stopping condition...") def custom_stopping_condition(response: str) -> bool: return "STOP" in response.upper() agent = Agent( agent_name="Stopping-Condition-Agent", model_name="gpt-4.1", max_loops=1, stopping_condition=custom_stopping_condition, ) response = agent.run("Count up until you see the word STOP") assert response is not None, "Stopping condition test failed" print("✓ Stopping condition test passed") def test_agent_with_retry_mechanism(self): """Test agent retry mechanism""" print("\nTesting agent retry mechanism...") agent = Agent( agent_name="Retry-Test-Agent", model_name="gpt-4.1", max_loops=1, retry_attempts=3, retry_interval=1, ) response = agent.run("Tell me a joke.") assert response is not None, "Retry mechanism test failed" print("✓ Retry mechanism test passed") def test_bulk_and_filtered_operations(self): """Test bulk operations and response filtering""" print("\nTesting bulk and filtered operations...") agent = Agent( agent_name="Bulk-Filter-Test-Agent", model_name="gpt-4.1", max_loops=1, ) # Test bulk run bulk_tasks = [ {"task": "What is 2+2?"}, {"task": "Name a color"}, {"task": "Count to 3"}, ] bulk_responses = agent.bulk_run(bulk_tasks) assert len(bulk_responses) == len( bulk_tasks ), "Bulk run should return same number of responses as tasks" # Test response filtering agent.add_response_filter("color") filtered_response = agent.filtered_run( "What is your favorite color?" ) assert ( "[FILTERED]" in filtered_response ), "Response filter not applied" print("✓ Bulk and filtered operations test passed") async def test_async_operations(self): """Test asynchronous operations""" print("\nTesting async operations...") agent = Agent( agent_name="Async-Test-Agent", model_name="gpt-4.1", max_loops=1, ) # Test single async run response = await agent.arun("What is 1+1?") assert response is not None, "Async run failed" # Test concurrent async runs tasks = ["Task 1", "Task 2", "Task 3"] responses = await asyncio.gather( *[agent.arun(task) for task in tasks] ) assert len(responses) == len( tasks ), "Not all async tasks completed" print("✓ Async operations test passed") def test_memory_and_state_persistence(self): """Test memory management and state persistence""" print("\nTesting memory and state persistence...") with tempfile.TemporaryDirectory() as temp_dir: state_path = os.path.join(temp_dir, "test_state.json") # Create agent with memory configuration agent1 = Agent( agent_name="Memory-State-Test-Agent", model_name="gpt-4.1", max_loops=1, saved_state_path=state_path, context_length=8192, autosave=True, ) # Test memory operations agent1.add_memory("Important fact: The sky is blue") agent1.memory_query("What color is the sky?") # Save state agent1.save() # Create new agent and load state agent2 = Agent( agent_name="Memory-State-Test-Agent", model_name="gpt-4.1", max_loops=1, ) agent2.load(state_path) # Verify memory persistence memory_content = ( agent2.short_memory.return_history_as_string() ) assert ( "sky is blue" in memory_content ), "Memory not properly persisted" print("✓ Memory and state persistence test passed") def test_sentiment_and_evaluation(self): """Test sentiment analysis and response evaluation""" print("\nTesting sentiment analysis and evaluation...") def mock_sentiment_analyzer(text): """Mock sentiment analyzer that returns a score between 0 and 1""" return 0.7 if "positive" in text.lower() else 0.3 def mock_evaluator(response): """Mock evaluator that checks response quality""" return "GOOD" if len(response) > 10 else "BAD" agent = Agent( agent_name="Sentiment-Eval-Test-Agent", model_name="gpt-4.1", max_loops=1, sentiment_analyzer=mock_sentiment_analyzer, sentiment_threshold=0.5, evaluator=mock_evaluator, ) # Test sentiment analysis agent.run("Generate a positive message") # Test evaluation agent.run("Generate a detailed response") print("✓ Sentiment and evaluation test passed") def test_tool_management(self): """Test tool management functionality""" print("\nTesting tool management...") def tool1(x: int) -> int: """Sample tool 1""" return x * 2 def tool2(x: int) -> int: """Sample tool 2""" return x + 2 agent = Agent( agent_name="Tool-Test-Agent", model_name="gpt-4.1", max_loops=1, tools=[tool1], ) # Test adding tools agent.add_tool(tool2) assert len(agent.tools) == 2, "Tool not added correctly" # Test removing tools agent.remove_tool(tool1) assert len(agent.tools) == 1, "Tool not removed correctly" # Test adding multiple tools agent.add_tools([tool1, tool2]) assert ( len(agent.tools) == 3 ), "Multiple tools not added correctly" print("✓ Tool management test passed") def test_system_prompt_and_configuration(self): """Test system prompt and configuration updates""" print("\nTesting system prompt and configuration...") agent = Agent( agent_name="Config-Test-Agent", model_name="gpt-4.1", max_loops=1, ) # Test updating system prompt new_prompt = "You are a helpful assistant." agent.update_system_prompt(new_prompt) assert ( agent.system_prompt == new_prompt ), "System prompt not updated" # Test configuration updates agent.update_max_loops(5) assert agent.max_loops == 5, "Max loops not updated" agent.update_loop_interval(2) assert agent.loop_interval == 2, "Loop interval not updated" # Test configuration export config_dict = agent.to_dict() assert isinstance( config_dict, dict ), "Configuration export failed" print("✓ System prompt and configuration test passed") def test_agent_with_dynamic_temperature(self): """Test agent with dynamic temperature""" print("\nTesting agent with dynamic temperature...") agent = Agent( agent_name="Dynamic-Temp-Agent", model_name="gpt-4.1", max_loops=2, dynamic_temperature_enabled=True, ) response = agent.run("Generate a creative story.") assert response is not None, "Dynamic temperature test failed" print("✓ Dynamic temperature test passed") # ============================================================================ # AGENT LOGGING TESTS # ============================================================================ class TestAgentLogging: """Test agent logging functionality""" def setUp(self): """Set up test fixtures""" self.mock_tokenizer = MagicMock() self.mock_tokenizer.count_tokens.return_value = 100 self.mock_short_memory = MagicMock() self.mock_short_memory.get_memory_stats.return_value = { "message_count": 2 } self.mock_long_memory = MagicMock() self.mock_long_memory.get_memory_stats.return_value = { "item_count": 5 } self.agent = Agent( tokenizer=self.mock_tokenizer, short_memory=self.mock_short_memory, long_term_memory=self.mock_long_memory, ) def test_log_step_metadata_basic(self): """Test basic step metadata logging""" log_result = self.agent.log_step_metadata( 1, "Test prompt", "Test response" ) assert "step_id" in log_result assert "timestamp" in log_result assert "tokens" in log_result assert "memory_usage" in log_result assert log_result["tokens"]["total"] == 200 def test_log_step_metadata_no_long_term_memory(self): """Test step metadata logging without long term memory""" self.agent.long_term_memory = None log_result = self.agent.log_step_metadata( 1, "prompt", "response" ) assert log_result["memory_usage"]["long_term"] == {} def test_log_step_metadata_timestamp(self): """Test step metadata logging timestamp""" log_result = self.agent.log_step_metadata( 1, "prompt", "response" ) assert "timestamp" in log_result def test_token_counting_integration(self): """Test token counting integration""" self.mock_tokenizer.count_tokens.side_effect = [150, 250] log_result = self.agent.log_step_metadata( 1, "prompt", "response" ) assert log_result["tokens"]["total"] == 400 def test_agent_output_updating(self): """Test agent output updating""" initial_total_tokens = sum( step["tokens"]["total"] for step in self.agent.agent_output.steps ) self.agent.log_step_metadata(1, "prompt", "response") final_total_tokens = sum( step["tokens"]["total"] for step in self.agent.agent_output.steps ) assert final_total_tokens - initial_total_tokens == 200 assert len(self.agent.agent_output.steps) == 1 def test_full_logging_cycle(self): """Test full logging cycle""" agent = Agent(agent_name="test-agent") task = "Test task" max_loops = 1 result = agent._run(task, max_loops=max_loops) assert isinstance(result, dict) assert "steps" in result assert isinstance(result["steps"], list) assert len(result["steps"]) == max_loops if result["steps"]: step = result["steps"][0] assert "step_id" in step assert "timestamp" in step assert "task" in step assert "response" in step assert step["task"] == task assert step["response"] == "Response for loop 1" assert len(self.agent.agent_output.steps) > 0 # ============================================================================ # YAML AGENT CREATION TESTS # ============================================================================ class TestCreateAgentsFromYaml: """Test YAML agent creation functionality""" def setUp(self): """Set up test fixtures""" # Mock the environment variable for API key os.environ["OPENAI_API_KEY"] = "fake-api-key" # Mock agent configuration YAML content self.valid_yaml_content = """ agents: - agent_name: "Financial-Analysis-Agent" model: openai_api_key: "fake-api-key" model_name: "gpt-4o-mini" temperature: 0.1 max_tokens: 2000 system_prompt: "financial_agent_sys_prompt" max_loops: 1 autosave: true dashboard: false verbose: true dynamic_temperature_enabled: true saved_state_path: "finance_agent.json" user_name: "swarms_corp" retry_attempts: 1 context_length: 200000 return_step_meta: false output_type: "str" task: "How can I establish a ROTH IRA to buy stocks and get a tax break?" """ @patch( "builtins.open", new_callable=unittest.mock.mock_open, read_data="", ) @patch("yaml.safe_load") def test_create_agents_return_agents( self, mock_safe_load, mock_open ): """Test creating agents from YAML and returning agents""" # Mock YAML content parsing mock_safe_load.return_value = { "agents": [ { "agent_name": "Financial-Analysis-Agent", "model": { "openai_api_key": "fake-api-key", "model_name": "gpt-4o-mini", "temperature": 0.1, "max_tokens": 2000, }, "system_prompt": "financial_agent_sys_prompt", "max_loops": 1, "autosave": True, "dashboard": False, "verbose": True, "dynamic_temperature_enabled": True, "saved_state_path": "finance_agent.json", "user_name": "swarms_corp", "retry_attempts": 1, "context_length": 200000, "return_step_meta": False, "output_type": "str", "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?", } ] } # Test if agents are returned correctly agents = create_agents_from_yaml( "fake_yaml_path.yaml", return_type="agents" ) assert len(agents) == 1 assert agents[0].agent_name == "Financial-Analysis-Agent" @patch( "builtins.open", new_callable=unittest.mock.mock_open, read_data="", ) @patch("yaml.safe_load") @patch( "swarms.Agent.run", return_value="Task completed successfully" ) def test_create_agents_return_tasks( self, mock_agent_run, mock_safe_load, mock_open ): """Test creating agents from YAML and returning task results""" # Mock YAML content parsing mock_safe_load.return_value = { "agents": [ { "agent_name": "Financial-Analysis-Agent", "model": { "openai_api_key": "fake-api-key", "model_name": "gpt-4o-mini", "temperature": 0.1, "max_tokens": 2000, }, "system_prompt": "financial_agent_sys_prompt", "max_loops": 1, "autosave": True, "dashboard": False, "verbose": True, "dynamic_temperature_enabled": True, "saved_state_path": "finance_agent.json", "user_name": "swarms_corp", "retry_attempts": 1, "context_length": 200000, "return_step_meta": False, "output_type": "str", "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?", } ] } # Test if tasks are executed and results are returned task_results = create_agents_from_yaml( "fake_yaml_path.yaml", return_type="tasks" ) assert len(task_results) == 1 assert ( task_results[0]["agent_name"] == "Financial-Analysis-Agent" ) assert task_results[0]["output"] is not None @patch( "builtins.open", new_callable=unittest.mock.mock_open, read_data="", ) @patch("yaml.safe_load") def test_create_agents_return_both( self, mock_safe_load, mock_open ): """Test creating agents from YAML and returning both agents and tasks""" # Mock YAML content parsing mock_safe_load.return_value = { "agents": [ { "agent_name": "Financial-Analysis-Agent", "model": { "openai_api_key": "fake-api-key", "model_name": "gpt-4o-mini", "temperature": 0.1, "max_tokens": 2000, }, "system_prompt": "financial_agent_sys_prompt", "max_loops": 1, "autosave": True, "dashboard": False, "verbose": True, "dynamic_temperature_enabled": True, "saved_state_path": "finance_agent.json", "user_name": "swarms_corp", "retry_attempts": 1, "context_length": 200000, "return_step_meta": False, "output_type": "str", "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?", } ] } # Test if both agents and tasks are returned agents, task_results = create_agents_from_yaml( "fake_yaml_path.yaml", return_type="both" ) assert len(agents) == 1 assert len(task_results) == 1 assert agents[0].agent_name == "Financial-Analysis-Agent" assert task_results[0]["output"] is not None @patch( "builtins.open", new_callable=unittest.mock.mock_open, read_data="", ) @patch("yaml.safe_load") def test_missing_agents_in_yaml(self, mock_safe_load, mock_open): """Test handling missing agents in YAML""" # Mock YAML content with missing "agents" key mock_safe_load.return_value = {} # Test if the function raises an error for missing "agents" key with pytest.raises(ValueError) as context: create_agents_from_yaml( "fake_yaml_path.yaml", return_type="agents" ) assert ( "The YAML configuration does not contain 'agents'." in str(context.exception) ) @patch( "builtins.open", new_callable=unittest.mock.mock_open, read_data="", ) @patch("yaml.safe_load") def test_invalid_return_type(self, mock_safe_load, mock_open): """Test handling invalid return type""" # Mock YAML content parsing mock_safe_load.return_value = { "agents": [ { "agent_name": "Financial-Analysis-Agent", "model": { "openai_api_key": "fake-api-key", "model_name": "gpt-4o-mini", "temperature": 0.1, "max_tokens": 2000, }, "system_prompt": "financial_agent_sys_prompt", "max_loops": 1, "autosave": True, "dashboard": False, "verbose": True, "dynamic_temperature_enabled": True, "saved_state_path": "finance_agent.json", "user_name": "swarms_corp", "retry_attempts": 1, "context_length": 200000, "return_step_meta": False, "output_type": "str", "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?", } ] } # Test if an error is raised for invalid return_type with pytest.raises(ValueError) as context: create_agents_from_yaml( "fake_yaml_path.yaml", return_type="invalid_type" ) assert "Invalid return_type" in str(context.exception) # ============================================================================ # BENCHMARK TESTS # ============================================================================ class TestAgentBenchmark: """Test agent benchmarking functionality""" def test_benchmark_multiple_agents(self): """Test benchmarking multiple agents""" console = Console() init_times = [] memory_readings = [] process = psutil.Process(os.getpid()) # Create benchmark tables time_table = Table(title="Time Statistics") time_table.add_column("Metric", style="cyan") time_table.add_column("Value", style="green") memory_table = Table(title="Memory Statistics") memory_table.add_column("Metric", style="cyan") memory_table.add_column("Value", style="green") initial_memory = process.memory_info().rss / 1024 start_total_time = time.perf_counter() # Initialize agents and measure performance num_agents = 10 # Reduced for testing for i in range(num_agents): start_time = time.perf_counter() Agent( agent_name=f"Financial-Analysis-Agent-{i}", agent_description="Personal finance advisor agent", max_loops=2, model_name="gpt-4o-mini", dynamic_temperature_enabled=True, interactive=False, ) init_time = (time.perf_counter() - start_time) * 1000 init_times.append(init_time) current_memory = process.memory_info().rss / 1024 memory_readings.append(current_memory - initial_memory) if (i + 1) % 5 == 0: console.print( f"Created {i + 1} agents...", style="bold blue" ) (time.perf_counter() - start_total_time) * 1000 # Calculate statistics time_stats = self._get_time_stats(init_times) memory_stats = self._get_memory_stats(memory_readings) # Verify basic statistics assert len(init_times) == num_agents assert len(memory_readings) == num_agents assert time_stats["mean"] > 0 assert memory_stats["mean"] >= 0 print("✓ Benchmark test passed") def _get_memory_stats(self, memory_readings): """Calculate memory statistics""" return { "peak": max(memory_readings) if memory_readings else 0, "min": min(memory_readings) if memory_readings else 0, "mean": mean(memory_readings) if memory_readings else 0, "median": ( median(memory_readings) if memory_readings else 0 ), "stdev": ( stdev(memory_readings) if len(memory_readings) > 1 else 0 ), "variance": ( variance(memory_readings) if len(memory_readings) > 1 else 0 ), } def _get_time_stats(self, times): """Calculate time statistics""" return { "total": sum(times), "mean": mean(times) if times else 0, "median": median(times) if times else 0, "min": min(times) if times else 0, "max": max(times) if times else 0, "stdev": stdev(times) if len(times) > 1 else 0, "variance": variance(times) if len(times) > 1 else 0, } # ============================================================================ # TOOL USAGE TESTS # ============================================================================ class TestAgentToolUsage: """Test comprehensive tool usage functionality for agents""" def test_normal_callable_tools(self): """Test normal callable tools (functions, lambdas, methods)""" print("\nTesting normal callable tools...") def math_tool(x: int, y: int) -> int: """Add two numbers together""" return x + y def string_tool(text: str) -> str: """Convert text to uppercase""" return text.upper() def list_tool(items: list) -> int: """Count items in a list""" return len(items) # Test with individual function tools agent = Agent( agent_name="Callable-Tools-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[math_tool, string_tool, list_tool], ) # Test tool addition assert len(agent.tools) == 3, "Tools not added correctly" # Test tool execution response = agent.run("Use the math tool to add 5 and 3") assert response is not None, "Tool execution failed" # Test lambda tools def lambda_tool(x): return x * 2 agent.add_tool(lambda_tool) assert ( len(agent.tools) == 4 ), "Lambda tool not added correctly" # Test method tools class MathOperations: def multiply(self, x: int, y: int) -> int: """Multiply two numbers""" return x * y math_ops = MathOperations() agent.add_tool(math_ops.multiply) assert ( len(agent.tools) == 5 ), "Method tool not added correctly" print("✓ Normal callable tools test passed") def test_tool_management_operations(self): """Test tool management operations (add, remove, list)""" print("\nTesting tool management operations...") def tool1(x: int) -> int: """Tool 1""" return x + 1 def tool2(x: int) -> int: """Tool 2""" return x * 2 def tool3(x: int) -> int: """Tool 3""" return x - 1 agent = Agent( agent_name="Tool-Management-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[tool1, tool2], ) # Test initial tools assert ( len(agent.tools) == 2 ), "Initial tools not set correctly" # Test adding single tool agent.add_tool(tool3) assert len(agent.tools) == 3, "Single tool addition failed" # Test adding multiple tools def tool4(x: int) -> int: return x**2 def tool5(x: int) -> int: return x // 2 agent.add_tools([tool4, tool5]) assert len(agent.tools) == 5, "Multiple tools addition failed" # Test removing single tool agent.remove_tool(tool1) assert len(agent.tools) == 4, "Single tool removal failed" # Test removing multiple tools agent.remove_tools([tool2, tool3]) assert len(agent.tools) == 2, "Multiple tools removal failed" print("✓ Tool management operations test passed") def test_mcp_single_url_tools(self): """Test MCP single URL tools""" print("\nTesting MCP single URL tools...") # Mock MCP URL for testing mock_mcp_url = "http://localhost:8000/mcp" with patch( "swarms.structs.agent.get_mcp_tools_sync" ) as mock_get_tools: # Mock MCP tools response mock_tools = [ { "type": "function", "function": { "name": "mcp_calculator", "description": "Perform calculations", "parameters": { "type": "object", "properties": { "expression": { "type": "string", "description": "Math expression", } }, "required": ["expression"], }, }, }, { "type": "function", "function": { "name": "mcp_weather", "description": "Get weather information", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "City name", } }, "required": ["location"], }, }, }, ] mock_get_tools.return_value = mock_tools agent = Agent( agent_name="MCP-Single-URL-Test-Agent", model_name="gpt-4o-mini", max_loops=1, mcp_url=mock_mcp_url, verbose=True, ) # Test MCP tools integration tools = agent.add_mcp_tools_to_memory() assert len(tools) == 2, "MCP tools not loaded correctly" assert ( mock_get_tools.called ), "MCP tools function not called" # Verify tool structure assert "mcp_calculator" in str( tools ), "Calculator tool not found" assert "mcp_weather" in str( tools ), "Weather tool not found" print("✓ MCP single URL tools test passed") def test_mcp_multiple_urls_tools(self): """Test MCP multiple URLs tools""" print("\nTesting MCP multiple URLs tools...") # Mock multiple MCP URLs for testing mock_mcp_urls = [ "http://localhost:8000/mcp1", "http://localhost:8000/mcp2", "http://localhost:8000/mcp3", ] with patch( "swarms.structs.agent.get_tools_for_multiple_mcp_servers" ) as mock_get_tools: # Mock MCP tools response from multiple servers mock_tools = [ { "type": "function", "function": { "name": "server1_tool", "description": "Tool from server 1", "parameters": { "type": "object", "properties": { "input": {"type": "string"} }, }, }, }, { "type": "function", "function": { "name": "server2_tool", "description": "Tool from server 2", "parameters": { "type": "object", "properties": { "data": {"type": "string"} }, }, }, }, { "type": "function", "function": { "name": "server3_tool", "description": "Tool from server 3", "parameters": { "type": "object", "properties": { "query": {"type": "string"} }, }, }, }, ] mock_get_tools.return_value = mock_tools agent = Agent( agent_name="MCP-Multiple-URLs-Test-Agent", model_name="gpt-4o-mini", max_loops=1, mcp_urls=mock_mcp_urls, verbose=True, ) # Test MCP tools integration from multiple servers tools = agent.add_mcp_tools_to_memory() assert ( len(tools) == 3 ), "MCP tools from multiple servers not loaded correctly" assert ( mock_get_tools.called ), "MCP multiple tools function not called" # Verify tools from different servers tools_str = str(tools) assert ( "server1_tool" in tools_str ), "Server 1 tool not found" assert ( "server2_tool" in tools_str ), "Server 2 tool not found" assert ( "server3_tool" in tools_str ), "Server 3 tool not found" print("✓ MCP multiple URLs tools test passed") def test_base_tool_class_tools(self): """Test BaseTool class tools""" print("\nTesting BaseTool class tools...") from swarms.tools.base_tool import BaseTool def sample_function(x: int, y: int) -> int: """Sample function for testing""" return x + y # Create BaseTool instance base_tool = BaseTool( verbose=True, tools=[sample_function], tool_system_prompt="You are a helpful tool assistant", ) # Test tool schema generation schema = base_tool.func_to_dict(sample_function) assert isinstance( schema, dict ), "Tool schema not generated correctly" assert "name" in schema, "Tool name not in schema" assert ( "description" in schema ), "Tool description not in schema" assert "parameters" in schema, "Tool parameters not in schema" # Test tool execution test_input = {"x": 5, "y": 3} result = base_tool.execute_tool(test_input) assert result is not None, "Tool execution failed" print("✓ BaseTool class tools test passed") def test_tool_execution_and_error_handling(self): """Test tool execution and error handling""" print("\nTesting tool execution and error handling...") def valid_tool(x: int) -> int: """Valid tool that works correctly""" return x * 2 def error_tool(x: int) -> int: """Tool that raises an error""" raise ValueError("Test error") def type_error_tool(x: str) -> str: """Tool with type error""" return x.upper() agent = Agent( agent_name="Tool-Execution-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[valid_tool, error_tool, type_error_tool], ) # Test valid tool execution response = agent.run("Use the valid tool with input 5") assert response is not None, "Valid tool execution failed" # Test error handling try: agent.run("Use the error tool") # Should handle error gracefully except Exception: # Expected to handle errors gracefully pass print("✓ Tool execution and error handling test passed") def test_tool_schema_generation(self): """Test tool schema generation and validation""" print("\nTesting tool schema generation...") def complex_tool( name: str, age: int, email: str = None, is_active: bool = True, ) -> dict: """Complex tool with various parameter types""" return { "name": name, "age": age, "email": email, "is_active": is_active, } agent = Agent( agent_name="Tool-Schema-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[complex_tool], ) # Test that tools are properly registered assert len(agent.tools) == 1, "Tool not registered correctly" # Test tool execution with complex parameters response = agent.run( "Use the complex tool with name 'John', age 30, email 'john@example.com'" ) assert response is not None, "Complex tool execution failed" print("✓ Tool schema generation test passed") def test_aop_tools(self): """Test AOP (Agent Operations) tools""" print("\nTesting AOP tools...") from swarms.structs.aop import AOP # Create test agents agent1 = Agent( agent_name="AOP-Agent-1", model_name="gpt-4o-mini", max_loops=1, ) agent2 = Agent( agent_name="AOP-Agent-2", model_name="gpt-4o-mini", max_loops=1, ) # Create AOP instance aop = AOP( server_name="test-aop-server", verbose=True, ) # Test adding agents as tools tool_names = aop.add_agents_batch( agents=[agent1, agent2], tool_names=["math_agent", "text_agent"], tool_descriptions=[ "Performs mathematical operations", "Handles text processing", ], ) assert ( len(tool_names) == 2 ), "AOP agents not added as tools correctly" assert ( "math_agent" in tool_names ), "Math agent tool not created" assert ( "text_agent" in tool_names ), "Text agent tool not created" # Test tool discovery tools = aop.get_available_tools() assert len(tools) >= 2, "AOP tools not discovered correctly" print("✓ AOP tools test passed") def test_tool_choice_and_execution_modes(self): """Test different tool choice and execution modes""" print("\nTesting tool choice and execution modes...") def tool_a(x: int) -> int: """Tool A""" return x + 1 def tool_b(x: int) -> int: """Tool B""" return x * 2 # Test with auto tool choice agent_auto = Agent( agent_name="Auto-Tool-Choice-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[tool_a, tool_b], tool_choice="auto", ) response_auto = agent_auto.run( "Calculate something using the available tools" ) assert response_auto is not None, "Auto tool choice failed" # Test with specific tool choice agent_specific = Agent( agent_name="Specific-Tool-Choice-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[tool_a, tool_b], tool_choice="tool_a", ) response_specific = agent_specific.run( "Use tool_a with input 5" ) assert ( response_specific is not None ), "Specific tool choice failed" # Test with tool execution enabled/disabled agent_execute = Agent( agent_name="Tool-Execute-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[tool_a, tool_b], execute_tool=True, ) response_execute = agent_execute.run("Execute a tool") assert ( response_execute is not None ), "Tool execution mode failed" print("✓ Tool choice and execution modes test passed") def test_tool_system_prompts(self): """Test tool system prompts and custom tool prompts""" print("\nTesting tool system prompts...") def calculator_tool(expression: str) -> str: """Calculate mathematical expressions""" try: result = eval(expression) return str(result) except Exception: return "Invalid expression" custom_tool_prompt = "You have access to a calculator tool. Use it for mathematical calculations." agent = Agent( agent_name="Tool-Prompt-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[calculator_tool], tool_system_prompt=custom_tool_prompt, ) # Test that custom tool prompt is set assert ( agent.tool_system_prompt == custom_tool_prompt ), "Custom tool prompt not set" # Test tool execution with custom prompt response = agent.run("Calculate 2 + 2 * 3") assert ( response is not None ), "Tool execution with custom prompt failed" print("✓ Tool system prompts test passed") def test_tool_parallel_execution(self): """Test parallel tool execution capabilities""" print("\nTesting parallel tool execution...") def slow_tool(x: int) -> int: """Slow tool that takes time""" import time time.sleep(0.1) # Simulate slow operation return x * 2 def fast_tool(x: int) -> int: """Fast tool""" return x + 1 agent = Agent( agent_name="Parallel-Tool-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[slow_tool, fast_tool], ) # Test parallel tool execution start_time = time.time() response = agent.run("Use both tools with input 5") end_time = time.time() assert response is not None, "Parallel tool execution failed" # Should be faster than sequential execution assert ( end_time - start_time ) < 0.5, "Parallel execution took too long" print("✓ Parallel tool execution test passed") def test_tool_validation_and_type_checking(self): """Test tool validation and type checking""" print("\nTesting tool validation and type checking...") def typed_tool(x: int, y: str, z: bool = False) -> dict: """Tool with specific type hints""" return {"x": x, "y": y, "z": z, "result": f"{x} {y} {z}"} agent = Agent( agent_name="Tool-Validation-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[typed_tool], ) # Test tool execution with correct types response = agent.run( "Use typed_tool with x=5, y='hello', z=True" ) assert response is not None, "Typed tool execution failed" # Test tool execution with incorrect types (should handle gracefully) try: agent.run("Use typed_tool with incorrect types") except Exception: # Expected to handle type errors gracefully pass print("✓ Tool validation and type checking test passed") def test_tool_caching_and_performance(self): """Test tool caching and performance optimization""" print("\nTesting tool caching and performance...") call_count = 0 def cached_tool(x: int) -> int: """Tool that should be cached""" nonlocal call_count call_count += 1 return x**2 agent = Agent( agent_name="Tool-Caching-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[cached_tool], ) # Test multiple calls to the same tool agent.run("Use cached_tool with input 5") agent.run("Use cached_tool with input 5 again") # Verify tool was called (caching behavior may vary) assert call_count >= 1, "Tool not called at least once" print("✓ Tool caching and performance test passed") def test_tool_error_recovery(self): """Test tool error recovery and fallback mechanisms""" print("\nTesting tool error recovery...") def unreliable_tool(x: int) -> int: """Tool that sometimes fails""" import random if random.random() < 0.5: raise Exception("Random failure") return x * 2 def fallback_tool(x: int) -> int: """Fallback tool""" return x + 10 agent = Agent( agent_name="Tool-Recovery-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[unreliable_tool, fallback_tool], retry_attempts=3, ) # Test error recovery response = agent.run("Use unreliable_tool with input 5") assert response is not None, "Tool error recovery failed" print("✓ Tool error recovery test passed") def test_tool_with_different_output_types(self): """Test tools with different output types""" print("\nTesting tools with different output types...") def json_tool(data: dict) -> str: """Tool that returns JSON string""" import json return json.dumps(data) def yaml_tool(data: dict) -> str: """Tool that returns YAML string""" import yaml return yaml.dump(data) def dict_tool(x: int) -> dict: """Tool that returns dictionary""" return {"value": x, "squared": x**2} agent = Agent( agent_name="Output-Types-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[json_tool, yaml_tool, dict_tool], ) # Test JSON tool response = agent.run( "Use json_tool with data {'name': 'test', 'value': 123}" ) assert response is not None, "JSON tool execution failed" # Test YAML tool response = agent.run( "Use yaml_tool with data {'key': 'value'}" ) assert response is not None, "YAML tool execution failed" # Test dict tool response = agent.run("Use dict_tool with input 5") assert response is not None, "Dict tool execution failed" print("✓ Tools with different output types test passed") def test_tool_with_async_execution(self): """Test tools with async execution""" print("\nTesting tools with async execution...") async def async_tool(x: int) -> int: """Async tool that performs async operation""" import asyncio await asyncio.sleep(0.01) # Simulate async operation return x * 2 def sync_tool(x: int) -> int: """Sync tool""" return x + 1 agent = Agent( agent_name="Async-Tool-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[ sync_tool ], # Note: async tools need special handling ) # Test sync tool execution response = agent.run("Use sync_tool with input 5") assert response is not None, "Sync tool execution failed" print("✓ Tools with async execution test passed") def test_tool_with_file_operations(self): """Test tools that perform file operations""" print("\nTesting tools with file operations...") import os import tempfile def file_writer_tool(filename: str, content: str) -> str: """Tool that writes content to a file""" with open(filename, "w") as f: f.write(content) return f"Written {len(content)} characters to {filename}" def file_reader_tool(filename: str) -> str: """Tool that reads content from a file""" try: with open(filename, "r") as f: return f.read() except FileNotFoundError: return "File not found" with tempfile.TemporaryDirectory() as temp_dir: test_file = os.path.join(temp_dir, "test.txt") agent = Agent( agent_name="File-Ops-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[file_writer_tool, file_reader_tool], ) # Test file writing response = agent.run( f"Use file_writer_tool to write 'Hello World' to {test_file}" ) assert ( response is not None ), "File writing tool execution failed" # Test file reading response = agent.run( f"Use file_reader_tool to read from {test_file}" ) assert ( response is not None ), "File reading tool execution failed" print("✓ Tools with file operations test passed") def test_tool_with_network_operations(self): """Test tools that perform network operations""" print("\nTesting tools with network operations...") def url_tool(url: str) -> str: """Tool that processes URLs""" return f"Processing URL: {url}" def api_tool(endpoint: str, method: str = "GET") -> str: """Tool that simulates API calls""" return f"API {method} request to {endpoint}" agent = Agent( agent_name="Network-Ops-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[url_tool, api_tool], ) # Test URL tool response = agent.run( "Use url_tool with 'https://example.com'" ) assert response is not None, "URL tool execution failed" # Test API tool response = agent.run( "Use api_tool with endpoint '/api/data' and method 'POST'" ) assert response is not None, "API tool execution failed" print("✓ Tools with network operations test passed") def test_tool_with_database_operations(self): """Test tools that perform database operations""" print("\nTesting tools with database operations...") def db_query_tool(query: str) -> str: """Tool that simulates database queries""" return f"Executed query: {query}" def db_insert_tool(table: str, data: dict) -> str: """Tool that simulates database inserts""" return f"Inserted data into {table}: {data}" agent = Agent( agent_name="Database-Ops-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[db_query_tool, db_insert_tool], ) # Test database query response = agent.run( "Use db_query_tool with 'SELECT * FROM users'" ) assert ( response is not None ), "Database query tool execution failed" # Test database insert response = agent.run( "Use db_insert_tool with table 'users' and data {'name': 'John'}" ) assert ( response is not None ), "Database insert tool execution failed" print("✓ Tools with database operations test passed") def test_tool_with_machine_learning_operations(self): """Test tools that perform ML operations""" print("\nTesting tools with ML operations...") def predict_tool(features: list) -> str: """Tool that simulates ML predictions""" return f"Prediction for features {features}: 0.85" def train_tool(model_name: str, data_size: int) -> str: """Tool that simulates model training""" return f"Trained {model_name} with {data_size} samples" agent = Agent( agent_name="ML-Ops-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[predict_tool, train_tool], ) # Test ML prediction response = agent.run( "Use predict_tool with features [1, 2, 3, 4]" ) assert ( response is not None ), "ML prediction tool execution failed" # Test ML training response = agent.run( "Use train_tool with model 'random_forest' and data_size 1000" ) assert ( response is not None ), "ML training tool execution failed" print("✓ Tools with ML operations test passed") def test_tool_with_image_processing(self): """Test tools that perform image processing""" print("\nTesting tools with image processing...") def resize_tool( image_path: str, width: int, height: int ) -> str: """Tool that simulates image resizing""" return f"Resized {image_path} to {width}x{height}" def filter_tool(image_path: str, filter_type: str) -> str: """Tool that simulates image filtering""" return f"Applied {filter_type} filter to {image_path}" agent = Agent( agent_name="Image-Processing-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[resize_tool, filter_tool], ) # Test image resizing response = agent.run( "Use resize_tool with image 'test.jpg', width 800, height 600" ) assert ( response is not None ), "Image resize tool execution failed" # Test image filtering response = agent.run( "Use filter_tool with image 'test.jpg' and filter 'blur'" ) assert ( response is not None ), "Image filter tool execution failed" print("✓ Tools with image processing test passed") def test_tool_with_text_processing(self): """Test tools that perform text processing""" print("\nTesting tools with text processing...") def tokenize_tool(text: str) -> list: """Tool that tokenizes text""" return text.split() def translate_tool(text: str, target_lang: str) -> str: """Tool that simulates translation""" return f"Translated '{text}' to {target_lang}" def sentiment_tool(text: str) -> str: """Tool that simulates sentiment analysis""" return f"Sentiment of '{text}': positive" agent = Agent( agent_name="Text-Processing-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[tokenize_tool, translate_tool, sentiment_tool], ) # Test text tokenization response = agent.run( "Use tokenize_tool with 'Hello world this is a test'" ) assert ( response is not None ), "Text tokenization tool execution failed" # Test translation response = agent.run( "Use translate_tool with 'Hello' and target_lang 'Spanish'" ) assert ( response is not None ), "Translation tool execution failed" # Test sentiment analysis response = agent.run( "Use sentiment_tool with 'I love this product!'" ) assert ( response is not None ), "Sentiment analysis tool execution failed" print("✓ Tools with text processing test passed") def test_tool_with_mathematical_operations(self): """Test tools that perform mathematical operations""" print("\nTesting tools with mathematical operations...") def matrix_multiply_tool( matrix_a: list, matrix_b: list ) -> list: """Tool that multiplies matrices""" # Simple 2x2 matrix multiplication result = [[0, 0], [0, 0]] for i in range(2): for j in range(2): for k in range(2): result[i][j] += ( matrix_a[i][k] * matrix_b[k][j] ) return result def statistics_tool(data: list) -> dict: """Tool that calculates statistics""" return { "mean": sum(data) / len(data), "max": max(data), "min": min(data), "count": len(data), } def calculus_tool(function: str, x: float) -> str: """Tool that simulates calculus operations""" return f"Derivative of {function} at x={x}: 2*x" agent = Agent( agent_name="Math-Ops-Test-Agent", model_name="gpt-4o-mini", max_loops=1, tools=[ matrix_multiply_tool, statistics_tool, calculus_tool, ], ) # Test matrix multiplication response = agent.run( "Use matrix_multiply_tool with [[1,2],[3,4]] and [[5,6],[7,8]]" ) assert ( response is not None ), "Matrix multiplication tool execution failed" # Test statistics response = agent.run( "Use statistics_tool with [1, 2, 3, 4, 5]" ) assert ( response is not None ), "Statistics tool execution failed" # Test calculus response = agent.run("Use calculus_tool with 'x^2' and x=3") assert response is not None, "Calculus tool execution failed" print("✓ Tools with mathematical operations test passed") # ============================================================================ # LLM ARGS AND HANDLING TESTS # ============================================================================ class TestLLMArgsAndHandling: """Test LLM arguments and handling functionality""" def test_combined_llm_args(self): """Test that llm_args, tools_list_dictionary, and MCP tools can be combined.""" print("\nTesting combined LLM args...") # Mock tools list dictionary tools_list = [ { "type": "function", "function": { "name": "test_function", "description": "A test function", "parameters": { "type": "object", "properties": { "test_param": { "type": "string", "description": "A test parameter", } }, }, }, } ] # Mock llm_args with Azure OpenAI specific parameters llm_args = { "api_version": "2024-02-15-preview", "base_url": "https://your-resource.openai.azure.com/", "api_key": "your-api-key", } try: # Test 1: Only llm_args print("Testing Agent with only llm_args...") Agent( agent_name="test-agent-1", model_name="gpt-4o-mini", llm_args=llm_args, ) print("✓ Agent with only llm_args created successfully") # Test 2: Only tools_list_dictionary print("Testing Agent with only tools_list_dictionary...") Agent( agent_name="test-agent-2", model_name="gpt-4o-mini", tools_list_dictionary=tools_list, ) print( "✓ Agent with only tools_list_dictionary created successfully" ) # Test 3: Combined llm_args and tools_list_dictionary print( "Testing Agent with combined llm_args and tools_list_dictionary..." ) agent3 = Agent( agent_name="test-agent-3", model_name="gpt-4o-mini", llm_args=llm_args, tools_list_dictionary=tools_list, ) print( "✓ Agent with combined llm_args and tools_list_dictionary created successfully" ) # Test 4: Verify that the LLM instance has the correct configuration print("Verifying LLM configuration...") # Check that agent3 has both llm_args and tools configured assert ( agent3.llm_args == llm_args ), "llm_args not preserved" assert ( agent3.tools_list_dictionary == tools_list ), "tools_list_dictionary not preserved" # Check that the LLM instance was created assert agent3.llm is not None, "LLM instance not created" print("✓ LLM configuration verified successfully") print("✓ Combined LLM args test passed") except Exception as e: print(f"✗ Combined LLM args test failed: {e}") raise def test_azure_openai_example(self): """Test the Azure OpenAI example with api_version parameter.""" print("\nTesting Azure OpenAI example with api_version...") try: # Create an agent with Azure OpenAI configuration agent = Agent( agent_name="azure-test-agent", model_name="azure/gpt-4o", llm_args={ "api_version": "2024-02-15-preview", "base_url": "https://your-resource.openai.azure.com/", "api_key": "your-api-key", }, tools_list_dictionary=[ { "type": "function", "function": { "name": "get_weather", "description": "Get weather information", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city and state", } }, }, }, } ], ) print( "✓ Azure OpenAI agent with combined parameters created successfully" ) # Verify configuration assert agent.llm_args is not None, "llm_args not set" assert ( "api_version" in agent.llm_args ), "api_version not in llm_args" assert ( agent.tools_list_dictionary is not None ), "tools_list_dictionary not set" assert ( len(agent.tools_list_dictionary) > 0 ), "tools_list_dictionary is empty" print("✓ Azure OpenAI configuration verified") print("✓ Azure OpenAI example test passed") except Exception as e: print(f"✗ Azure OpenAI test failed: {e}") raise def test_llm_handling_args_kwargs(self): """Test that llm_handling properly handles both args and kwargs.""" print("\nTesting LLM handling args and kwargs...") # Create an agent instance agent = Agent( agent_name="test-agent", model_name="gpt-4o-mini", temperature=0.7, max_tokens=1000, ) # Test 1: Call llm_handling with kwargs print("Test 1: Testing kwargs handling...") try: # This should work and add the kwargs to additional_args agent.llm_handling(top_p=0.9, frequency_penalty=0.1) print("✓ kwargs handling works") except Exception as e: print(f"✗ kwargs handling failed: {e}") raise # Test 2: Call llm_handling with args (dictionary) print("Test 2: Testing args handling with dictionary...") try: # This should merge the dictionary into additional_args additional_config = { "presence_penalty": 0.2, "logit_bias": {"123": 1}, } agent.llm_handling(additional_config) print("✓ args handling with dictionary works") except Exception as e: print(f"✗ args handling with dictionary failed: {e}") raise # Test 3: Call llm_handling with both args and kwargs print("Test 3: Testing both args and kwargs...") try: # This should handle both additional_config = {"presence_penalty": 0.3} agent.llm_handling( additional_config, top_p=0.8, frequency_penalty=0.2 ) print("✓ combined args and kwargs handling works") except Exception as e: print(f"✗ combined args and kwargs handling failed: {e}") raise # Test 4: Call llm_handling with non-dictionary args print("Test 4: Testing non-dictionary args...") try: # This should store args under 'additional_args' key agent.llm_handling( "some_string", 123, ["list", "of", "items"] ) print("✓ non-dictionary args handling works") except Exception as e: print(f"✗ non-dictionary args handling failed: {e}") raise print("✓ LLM handling args and kwargs test passed") # ============================================================================ # MAIN TEST RUNNER # ============================================================================ def run_all_tests(): """Run all test functions""" print("Starting Merged Agent Test Suite...\n") # Test classes to run test_classes = [ TestBasicAgent, TestAgentFeatures, TestAgentLogging, TestCreateAgentsFromYaml, TestAgentBenchmark, TestAgentToolUsage, TestLLMArgsAndHandling, ] total_tests = 0 passed_tests = 0 failed_tests = 0 for test_class in test_classes: print(f"\n{'='*50}") print(f"Running {test_class.__name__}") print(f"{'='*50}") # Create test instance test_instance = test_class() # Get all test methods test_methods = [ method for method in dir(test_instance) if method.startswith("test_") ] for test_method in test_methods: total_tests += 1 try: # Run the test method getattr(test_instance, test_method)() passed_tests += 1 print(f"✓ {test_method}") except Exception as e: failed_tests += 1 print(f"✗ {test_method}: {str(e)}") # Print summary print(f"\n{'='*50}") print("Test Summary") print(f"{'='*50}") print(f"Total Tests: {total_tests}") print(f"Passed: {passed_tests}") print(f"Failed: {failed_tests}") print(f"Success Rate: {(passed_tests/total_tests)*100:.2f}%") return { "total": total_tests, "passed": passed_tests, "failed": failed_tests, "success_rate": (passed_tests / total_tests) * 100, } if __name__ == "__main__": # Run all tests results = run_all_tests() print(results)