You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
517 lines
20 KiB
517 lines
20 KiB
import os
|
|
import json
|
|
import time
|
|
from datetime import datetime
|
|
from typing import List, Dict, Any, Callable
|
|
|
|
import requests
|
|
from dotenv import load_dotenv
|
|
|
|
# Basic Imports for Swarms
|
|
from swarms.structs import (
|
|
Agent,
|
|
SequentialWorkflow,
|
|
ConcurrentWorkflow,
|
|
AgentRearrange,
|
|
MixtureOfAgents,
|
|
SpreadSheetSwarm,
|
|
GroupChat,
|
|
MultiAgentRouter,
|
|
MajorityVoting,
|
|
SwarmRouter,
|
|
RoundRobinSwarm,
|
|
InteractiveGroupChat
|
|
)
|
|
# Import swarms not in __init__.py directly
|
|
from swarms.structs.hiearchical_swarm import HierarchicalSwarm
|
|
from swarms.structs.tree_swarm import ForestSwarm
|
|
from swarms.tools.base_tool import BaseTool
|
|
|
|
# Setup Logging
|
|
from loguru import logger
|
|
logger.add("test_runs/test_failures.log", rotation="10 MB", level="ERROR")
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
# --- Constants and Configuration ---
|
|
API_KEY = os.getenv("OPENAI_API_KEY")
|
|
|
|
# GitHub Issue Creation (commented out for later use)
|
|
# GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
|
|
# GITHUB_REPO_OWNER = os.getenv("GITHUB_REPO_OWNER", "kyegomez")
|
|
# GITHUB_REPO_NAME = os.getenv("GITHUB_REPO_NAME", "swarms")
|
|
# BASE_URL = "https://api.github.com"
|
|
# GITHUB_HEADERS = {
|
|
# "Authorization": f"token {GITHUB_TOKEN}",
|
|
# "Accept": "application/vnd.github.v3+json",
|
|
# }
|
|
|
|
# --- Helper Functions ---
|
|
|
|
def generate_timestamp() -> str:
|
|
"""Generate a timestamp string for filenames"""
|
|
return datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
|
def write_markdown_report(results: List[Dict[str, Any]], filename: str):
|
|
"""Write test results to a markdown file"""
|
|
if not os.path.exists("test_runs"):
|
|
os.makedirs("test_runs")
|
|
|
|
with open(f"test_runs/{filename}.md", "w") as f:
|
|
f.write("# Swarms Comprehensive Test Report\n\n")
|
|
f.write(f"Test Run: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
|
|
|
total = len(results)
|
|
passed = sum(1 for r in results if r["status"] == "passed")
|
|
failed = total - passed
|
|
|
|
f.write("## Summary\n\n")
|
|
f.write(f"- **Total Tests:** {total}\n")
|
|
f.write(f"- **Passed:** {passed}\n")
|
|
f.write(f"- **Failed:** {failed}\n")
|
|
f.write(f"- **Success Rate:** {(passed/total)*100:.2f}%\n\n")
|
|
|
|
f.write("## Detailed Results\n\n")
|
|
for result in results:
|
|
f.write(f"### {result['test_name']}\n\n")
|
|
f.write(f"**Status:** {result['status'].upper()}\n\n")
|
|
if result.get("response"):
|
|
f.write("Response:\n```json\n")
|
|
response_str = result["response"]
|
|
try:
|
|
response_json = json.loads(response_str) if isinstance(response_str, str) else response_str
|
|
f.write(json.dumps(response_json, indent=2))
|
|
except (json.JSONDecodeError, TypeError):
|
|
f.write(str(response_str))
|
|
f.write("\n```\n\n")
|
|
|
|
if result.get("error"):
|
|
f.write(f"**Error:**\n```\n{result['error']}\n```\n\n")
|
|
f.write("---\n\n")
|
|
|
|
# def create_github_issue(test_result: Dict[str, Any]) -> Dict[str, Any]:
|
|
# """Create a GitHub issue for a failed test"""
|
|
# if not all([GITHUB_TOKEN, GITHUB_REPO_OWNER, GITHUB_REPO_NAME]):
|
|
# logger.warning("GitHub credentials not configured. Skipping issue creation.")
|
|
# return None
|
|
|
|
# if test_result["status"] != "failed":
|
|
# return None
|
|
|
|
# issue_title = f"Automated Test Failure: {test_result['test_name']}"
|
|
|
|
# issue_body = f"""
|
|
# ## Test Failure Report
|
|
|
|
# - **Test Name**: `{test_result['test_name']}`
|
|
# - **Timestamp**: `{datetime.now().isoformat()}`
|
|
# - **Status**: {test_result['status']}
|
|
|
|
# ### Error Information
|
|
# ```
|
|
# {test_result.get('error', 'No error message available')}
|
|
# ```
|
|
|
|
# ### Response (if available)
|
|
# ```json
|
|
# {json.dumps(test_result.get('response', {}), indent=2)}
|
|
# ```
|
|
|
|
# ---
|
|
# *This issue was automatically generated by the Swarms testing workflow.*
|
|
# """
|
|
|
|
# payload = {
|
|
# "title": issue_title,
|
|
# "body": issue_body,
|
|
# "labels": ["bug", "test-failure", "automated-report"],
|
|
# }
|
|
|
|
# try:
|
|
# response = requests.post(
|
|
# f"{BASE_URL}/repos/{GITHUB_REPO_OWNER}/{GITHUB_REPO_NAME}/issues",
|
|
# headers=GITHUB_HEADERS,
|
|
# json=payload,
|
|
# )
|
|
# response.raise_for_status()
|
|
# logger.info(f"Created GitHub issue for {test_result['test_name']}")
|
|
# return response.json()
|
|
# except requests.exceptions.RequestException as e:
|
|
# logger.error(f"Failed to create GitHub issue: {e.response.text if e.response else str(e)}")
|
|
# return None
|
|
|
|
def create_test_agent(name: str, system_prompt: str = None, tools: List[Callable] = None, **kwargs) -> Agent:
|
|
"""Create a properly configured test agent with error handling"""
|
|
try:
|
|
return Agent(
|
|
agent_name=name,
|
|
system_prompt=system_prompt or f"You are {name}, a helpful AI assistant.",
|
|
model_name="gpt-4o", # Use mini model for faster/cheaper testing
|
|
max_loops=1,
|
|
max_tokens=200,
|
|
tools=tools,
|
|
**kwargs
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to create agent {name}: {e}")
|
|
raise
|
|
|
|
# --- Basic Agent Tests ---
|
|
|
|
def test_basic_agent_functionality():
|
|
"""Test basic agent creation and execution"""
|
|
agent = create_test_agent("BasicAgent")
|
|
response = agent.run("Say hello and explain what you are.")
|
|
|
|
assert isinstance(response, str) and len(response) > 0
|
|
return {"test_name": "test_basic_agent_functionality", "status": "passed", "response": "Agent created and responded successfully"}
|
|
|
|
def test_agent_with_custom_prompt():
|
|
"""Test agent with custom system prompt"""
|
|
custom_prompt = "You are a mathematician who only responds with numbers and mathematical expressions."
|
|
agent = create_test_agent("MathAgent", system_prompt=custom_prompt)
|
|
response = agent.run("What is 2+2?")
|
|
|
|
assert isinstance(response, str) and len(response) > 0
|
|
return {"test_name": "test_agent_with_custom_prompt", "status": "passed", "response": response[:100]}
|
|
|
|
def test_tool_execution_with_agent():
|
|
"""Test agent's ability to use tools"""
|
|
def simple_calculator(a: int, b: int) -> int:
|
|
"""Add two numbers"""
|
|
return a + b
|
|
|
|
agent = create_test_agent("CalculatorAgent", tools=[simple_calculator])
|
|
response = agent.run("Use the calculator to add 5 and 7.")
|
|
|
|
assert isinstance(response, str) and len(response) > 0
|
|
return {"test_name": "test_tool_execution_with_agent", "status": "passed", "response": "Tool execution completed"}
|
|
|
|
# # --- Multi-Modal Tests ---
|
|
|
|
def test_multimodal_execution():
|
|
"""Test agent's ability to process images"""
|
|
agent = create_test_agent("VisionAgent", multi_modal=True)
|
|
response = agent.run("Describe this image.", img="tests/test_data/image1.jpg")
|
|
|
|
assert isinstance(response, str) and len(response) > 0
|
|
return {"test_name": "test_multimodal_execution", "status": "passed", "response": "Multimodal response received"}
|
|
|
|
def test_multiple_image_execution():
|
|
"""Test agent's ability to process multiple images"""
|
|
agent = create_test_agent("MultiVisionAgent", multi_modal=True)
|
|
response = agent.run_multiple_images("Describe these images.", imgs=["tests/test_data/image1.jpg", "tests/test_data/image2.png"])
|
|
|
|
assert isinstance(response, list) and len(response) >= 1
|
|
return {"test_name": "test_multiple_image_execution", "status": "passed", "response": "Multiple image responses received"}
|
|
|
|
# --- Workflow Tests ---
|
|
|
|
def test_sequential_workflow():
|
|
"""Test SequentialWorkflow with multiple agents"""
|
|
agents = [
|
|
create_test_agent("QuoteAgent", "Generate a famous quote."),
|
|
create_test_agent("ExplainerAgent", "Explain the meaning of the provided text.")
|
|
]
|
|
workflow = SequentialWorkflow(agents=agents, max_loops=1)
|
|
|
|
try:
|
|
response = workflow.run("Start by generating a quote, then explain it.")
|
|
logger.info(f"SequentialWorkflow response type: {type(response)}, length: {len(response) if hasattr(response, '__len__') else 'N/A'}")
|
|
|
|
# SequentialWorkflow returns a list of conversation messages
|
|
assert response is not None and isinstance(response, list) and len(response) > 0
|
|
return {"test_name": "test_sequential_workflow", "status": "passed", "response": "Sequential workflow completed"}
|
|
except Exception as e:
|
|
logger.error(f"SequentialWorkflow test failed with exception: {e}")
|
|
return {"test_name": "test_sequential_workflow", "status": "failed", "error": str(e)}
|
|
|
|
def test_concurrent_workflow():
|
|
"""Test ConcurrentWorkflow with multiple agents"""
|
|
agents = [
|
|
create_test_agent("Agent1"),
|
|
create_test_agent("Agent2")
|
|
]
|
|
workflow = ConcurrentWorkflow(agents=agents, max_loops=1)
|
|
|
|
try:
|
|
response = workflow.run("What are two different famous quotes?")
|
|
logger.info(f"ConcurrentWorkflow response type: {type(response)}, length: {len(response) if hasattr(response, '__len__') else 'N/A'}")
|
|
|
|
# ConcurrentWorkflow returns a list of conversation messages
|
|
assert response is not None and isinstance(response, list) and len(response) > 0
|
|
return {"test_name": "test_concurrent_workflow", "status": "passed", "response": "Concurrent workflow completed"}
|
|
except Exception as e:
|
|
logger.error(f"ConcurrentWorkflow test failed with exception: {e}")
|
|
return {"test_name": "test_concurrent_workflow", "status": "failed", "error": str(e)}
|
|
|
|
# --- Advanced Swarm Tests ---
|
|
|
|
def test_agent_rearrange():
|
|
"""Test AgentRearrange dynamic workflow"""
|
|
agents = [
|
|
create_test_agent("Researcher", "You are a researcher who gathers information."),
|
|
create_test_agent("Analyst", "You are an analyst who analyzes information."),
|
|
create_test_agent("Writer", "You are a writer who creates final reports.")
|
|
]
|
|
|
|
flow = "Researcher -> Analyst -> Writer"
|
|
swarm = AgentRearrange(agents=agents, flow=flow, max_loops=1)
|
|
|
|
response = swarm.run("Research the benefits of renewable energy, analyze the findings, and write a brief summary.")
|
|
|
|
# AgentRearrange with output_type="all" should return a string, but be flexible
|
|
assert response is not None and isinstance(response, (str, dict))
|
|
return {"test_name": "test_agent_rearrange", "status": "passed", "response": "AgentRearrange completed"}
|
|
|
|
def test_mixture_of_agents():
|
|
"""Test MixtureOfAgents collaboration"""
|
|
agents = [
|
|
create_test_agent("TechExpert", "You are a technology expert."),
|
|
create_test_agent("BusinessAnalyst", "You are a business analyst."),
|
|
create_test_agent("Strategist", "You are a strategic planner.")
|
|
]
|
|
|
|
swarm = MixtureOfAgents(agents=agents, max_loops=1)
|
|
response = swarm.run("Analyze the impact of AI on modern businesses.")
|
|
|
|
assert response is not None
|
|
return {"test_name": "test_mixture_of_agents", "status": "passed", "response": "MixtureOfAgents completed"}
|
|
|
|
def test_spreadsheet_swarm():
|
|
"""Test SpreadSheetSwarm for data processing"""
|
|
agents = [
|
|
create_test_agent("DataProcessor1"),
|
|
create_test_agent("DataProcessor2")
|
|
]
|
|
|
|
swarm = SpreadSheetSwarm(agents=agents, max_loops=1, autosave_on=False)
|
|
|
|
# SpreadSheetSwarm uses run() method, not run_agents()
|
|
response = swarm.run("Calculate 10*5 and 20+15")
|
|
|
|
assert response is not None
|
|
return {"test_name": "test_spreadsheet_swarm", "status": "passed", "response": "SpreadSheetSwarm completed"}
|
|
|
|
def test_hierarchical_swarm():
|
|
"""Test HierarchicalSwarm structure"""
|
|
# Create a director that knows to use available agents
|
|
director = create_test_agent("Director",
|
|
"You are a director who delegates tasks. When creating orders, you must only assign tasks to the following available agents: Worker1, Worker2. Do not create new agent names.")
|
|
|
|
workers = [
|
|
create_test_agent("Worker1", "You are Worker1 who follows instructions and can handle any assigned task."),
|
|
create_test_agent("Worker2", "You are Worker2 who follows instructions and can handle any assigned task.")
|
|
]
|
|
|
|
# HierarchicalSwarm constructor expects 'director' and 'agents' parameters
|
|
swarm = HierarchicalSwarm(
|
|
director=director,
|
|
agents=workers,
|
|
max_loops=1
|
|
)
|
|
|
|
response = swarm.run("Create a simple plan for organizing a team meeting. Use only the available agents: Worker1 and Worker2.")
|
|
|
|
assert response is not None
|
|
return {"test_name": "test_hierarchical_swarm", "status": "passed", "response": "HierarchicalSwarm completed"}
|
|
|
|
def test_majority_voting():
|
|
"""Test MajorityVoting consensus mechanism"""
|
|
agents = [
|
|
create_test_agent("Judge1", "You are a judge who evaluates options."),
|
|
create_test_agent("Judge2", "You are a judge who evaluates options."),
|
|
create_test_agent("Judge3", "You are a judge who evaluates options.")
|
|
]
|
|
|
|
swarm = MajorityVoting(agents=agents)
|
|
response = swarm.run("Should we invest in renewable energy? Answer with YES or NO and brief reasoning.")
|
|
|
|
assert response is not None
|
|
return {"test_name": "test_majority_voting", "status": "passed", "response": "MajorityVoting completed"}
|
|
|
|
def test_round_robin_swarm():
|
|
"""Test RoundRobinSwarm task distribution"""
|
|
agents = [
|
|
create_test_agent("Agent1"),
|
|
create_test_agent("Agent2"),
|
|
create_test_agent("Agent3")
|
|
]
|
|
|
|
swarm = RoundRobinSwarm(agents=agents)
|
|
tasks = ["Task 1: Count to 5", "Task 2: Name 3 colors", "Task 3: List 2 animals"]
|
|
|
|
response = swarm.run(tasks)
|
|
|
|
assert response is not None
|
|
return {"test_name": "test_round_robin_swarm", "status": "passed", "response": "RoundRobinSwarm completed"}
|
|
|
|
def test_swarm_router():
|
|
"""Test SwarmRouter dynamic routing"""
|
|
agents = [
|
|
create_test_agent("AnalysisAgent", "You specialize in data analysis."),
|
|
create_test_agent("WritingAgent", "You specialize in writing and communication.")
|
|
]
|
|
|
|
router = SwarmRouter(
|
|
name="TestRouter",
|
|
description="Routes tasks to appropriate agents",
|
|
agents=agents,
|
|
swarm_type="SequentialWorkflow"
|
|
)
|
|
|
|
response = router.run("Analyze some data and write a brief report.")
|
|
|
|
assert response is not None
|
|
return {"test_name": "test_swarm_router", "status": "passed", "response": "SwarmRouter completed"}
|
|
|
|
# --- Streaming and Performance Tests ---
|
|
|
|
def test_streaming_mode():
|
|
"""Test streaming response generation"""
|
|
agent = create_test_agent("StreamingAgent", streaming_on=True)
|
|
response = agent.run("Tell me a short story about technology.")
|
|
|
|
# For streaming mode, response should be a generator or string
|
|
assert response is not None
|
|
return {"test_name": "test_streaming_mode", "status": "passed", "response": "Streaming mode tested"}
|
|
|
|
def test_agent_memory_persistence():
|
|
"""Test agent memory functionality"""
|
|
agent = create_test_agent("MemoryAgent", return_history=True)
|
|
|
|
# First interaction
|
|
response1 = agent.run("My name is Alice. Remember this.")
|
|
# Second interaction
|
|
response2 = agent.run("What is my name?")
|
|
|
|
# Be flexible with return types since agents may return different formats
|
|
assert response1 is not None and response2 is not None
|
|
return {"test_name": "test_agent_memory_persistence", "status": "passed", "response": "Memory persistence tested"}
|
|
|
|
# --- Error Handling Tests ---
|
|
|
|
def test_error_handling():
|
|
"""Test agent error handling with invalid inputs"""
|
|
agent = create_test_agent("ErrorTestAgent")
|
|
|
|
try:
|
|
# Test with empty task
|
|
response = agent.run("")
|
|
assert response is not None
|
|
|
|
# Test with very long task
|
|
long_task = "A" * 10000
|
|
response = agent.run(long_task)
|
|
assert response is not None
|
|
|
|
return {"test_name": "test_error_handling", "status": "passed", "response": "Error handling tests passed"}
|
|
except Exception as e:
|
|
return {"test_name": "test_error_handling", "status": "failed", "error": str(e)}
|
|
|
|
# --- Integration Tests ---
|
|
|
|
def test_complex_workflow_integration():
|
|
"""Test complex multi-agent workflow integration"""
|
|
# Create specialized agents
|
|
researcher = create_test_agent("Researcher", "You research topics thoroughly.")
|
|
analyst = create_test_agent("Analyst", "You analyze research data.")
|
|
writer = create_test_agent("Writer", "You write clear summaries.")
|
|
|
|
try:
|
|
# Test different workflow combinations
|
|
sequential = SequentialWorkflow(agents=[researcher, analyst, writer], max_loops=1)
|
|
concurrent = ConcurrentWorkflow(agents=[researcher, analyst], max_loops=1)
|
|
|
|
seq_response = sequential.run("Research AI trends, analyze them, and write a summary.")
|
|
conc_response = concurrent.run("What are the benefits and challenges of AI?")
|
|
|
|
# Both workflows return lists of conversation messages
|
|
assert (seq_response is not None and isinstance(seq_response, list) and len(seq_response) > 0 and
|
|
conc_response is not None and isinstance(conc_response, list) and len(conc_response) > 0)
|
|
return {"test_name": "test_complex_workflow_integration", "status": "passed", "response": "Complex workflow integration completed"}
|
|
except Exception as e:
|
|
logger.error(f"Complex workflow integration test failed: {e}")
|
|
return {"test_name": "test_complex_workflow_integration", "status": "failed", "error": str(e)}
|
|
|
|
# --- Test Orchestrator ---
|
|
|
|
def run_all_tests():
|
|
"""Run all tests and generate a comprehensive report"""
|
|
logger.info("Starting Enhanced Swarms Comprehensive Test Suite")
|
|
|
|
tests_to_run = [
|
|
# Basic Tests
|
|
# test_basic_agent_functionality,
|
|
# test_agent_with_custom_prompt,
|
|
# test_tool_execution_with_agent,
|
|
|
|
# # Multi-Modal Tests
|
|
# test_multimodal_execution,
|
|
# test_multiple_image_execution,
|
|
|
|
# # Workflow Tests
|
|
# test_sequential_workflow,
|
|
# test_concurrent_workflow,
|
|
|
|
# # Advanced Swarm Tests
|
|
# test_agent_rearrange,
|
|
# test_mixture_of_agents,
|
|
test_spreadsheet_swarm,
|
|
test_hierarchical_swarm,
|
|
# test_majority_voting,
|
|
# test_round_robin_swarm,
|
|
# test_swarm_router,
|
|
|
|
# # Performance & Features
|
|
# test_streaming_mode,
|
|
# test_agent_memory_persistence,
|
|
# test_error_handling,
|
|
|
|
# # Integration Tests
|
|
# test_complex_workflow_integration,
|
|
]
|
|
|
|
results = []
|
|
for test_func in tests_to_run:
|
|
test_name = test_func.__name__
|
|
try:
|
|
logger.info(f"Running test: {test_name}...")
|
|
result = test_func()
|
|
results.append(result)
|
|
logger.info(f"Test {test_name} PASSED.")
|
|
except Exception as e:
|
|
logger.error(f"Test {test_name} FAILED: {e}")
|
|
error_details = {
|
|
"test_name": test_name,
|
|
"status": "failed",
|
|
"error": str(e),
|
|
"response": "Test execution failed"
|
|
}
|
|
results.append(error_details)
|
|
# create_github_issue(error_details) # Uncomment to enable GitHub issue creation
|
|
|
|
timestamp = generate_timestamp()
|
|
write_markdown_report(results, f"comprehensive_test_report_{timestamp}")
|
|
|
|
# Summary
|
|
total_tests = len(results)
|
|
passed_tests = sum(1 for r in results if r['status'] == 'passed')
|
|
failed_tests = total_tests - passed_tests
|
|
|
|
logger.info(f"Test Summary: {passed_tests}/{total_tests} passed ({(passed_tests/total_tests)*100:.1f}%)")
|
|
|
|
if failed_tests > 0:
|
|
logger.error(f"{failed_tests} tests failed. Check the report and logs.")
|
|
exit(1)
|
|
else:
|
|
logger.success("All tests passed successfully!")
|
|
|
|
if __name__ == "__main__":
|
|
if not API_KEY:
|
|
logger.error("OPENAI_API_KEY environment variable not set. Aborting tests.")
|
|
exit(1)
|
|
else:
|
|
run_all_tests()
|