parent
73dca28bd5
commit
f3e26623ae
@ -0,0 +1,89 @@
|
|||||||
|
# .github/workflows/comprehensive_tests.yml
|
||||||
|
|
||||||
|
name: Swarms Comprehensive Tests
|
||||||
|
|
||||||
|
# This workflow triggers on pushes and pull requests to the master branch.
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ master ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ master ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
# You can test against multiple Python versions here if needed.
|
||||||
|
python-version: ["3.10"]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
# Step 1: Check out the code.
|
||||||
|
# For pull requests, this action automatically checks out the code
|
||||||
|
# from the PR's branch, not the master branch. This is the key
|
||||||
|
# to testing the proposed changes.
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
# Step 2: Set up the specified Python version.
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
|
# Step 3: Install Poetry for dependency management.
|
||||||
|
- name: Install Poetry
|
||||||
|
uses: snok/install-poetry@v1
|
||||||
|
with:
|
||||||
|
virtualenvs-create: true
|
||||||
|
virtualenvs-in-project: true
|
||||||
|
|
||||||
|
# Step 4: Cache dependencies to speed up subsequent runs.
|
||||||
|
- name: Load cached venv
|
||||||
|
id: cached-poetry-dependencies
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
path: .venv
|
||||||
|
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
|
||||||
|
|
||||||
|
# Step 5: Install dependencies and the project package itself.
|
||||||
|
# This is the crucial step. 'poetry install' will install all dependencies
|
||||||
|
# and also install the 'swarms' package from the checked-out PR code
|
||||||
|
# in editable mode within the virtual environment.
|
||||||
|
- name: Install dependencies
|
||||||
|
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||||
|
run: poetry install --no-interaction --with dev --all-extras
|
||||||
|
|
||||||
|
# Step 6: Create dummy image files required for multi-modal tests.
|
||||||
|
# This ensures your tests are self-contained.
|
||||||
|
- name: Create dummy image files for testing
|
||||||
|
run: |
|
||||||
|
mkdir -p tests/test_data
|
||||||
|
touch tests/test_data/image1.jpg
|
||||||
|
touch tests/test_data/image2.png
|
||||||
|
echo "dummy image data" > tests/test_data/image1.jpg
|
||||||
|
echo "dummy image data" > tests/test_data/image2.png
|
||||||
|
|
||||||
|
# Step 7: Run the comprehensive test suite.
|
||||||
|
# 'poetry run' executes the command within the virtual environment,
|
||||||
|
# ensuring that when 'tests/comprehensive_test.py' imports 'swarms',
|
||||||
|
# it's importing the code from the pull request.
|
||||||
|
- name: Run Comprehensive Test Suite
|
||||||
|
env:
|
||||||
|
# Securely pass API keys and other secrets to the test environment.
|
||||||
|
# These must be configured in your repository's secrets.
|
||||||
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
# GITHUB_REPO_OWNER: "kyegomez"
|
||||||
|
# GITHUB_REPO_NAME: "swarms"
|
||||||
|
run: |
|
||||||
|
poetry run python tests/comprehensive_test.py
|
||||||
|
|
||||||
|
# Step 8: Upload the generated test report as an artifact.
|
||||||
|
# This happens even if the previous steps fail, allowing you to debug.
|
||||||
|
- name: Upload Test Report
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: test-report-${{ matrix.python-version }}
|
||||||
|
path: test_runs/
|
@ -0,0 +1,289 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Dict, Any, Callable
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Basic Imports for Swarms
|
||||||
|
from swarms.structs import Agent, SequentialWorkflow, ConcurrentWorkflow
|
||||||
|
from swarms.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
# Setup Logging
|
||||||
|
from loguru import logger
|
||||||
|
logger.add("test_runs/test_failures.log", rotation="10 MB", level="ERROR")
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# --- Constants and Configuration ---
|
||||||
|
API_KEY = os.getenv("OPENAI_API_KEY")
|
||||||
|
# GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
|
||||||
|
# GITHUB_REPO_OWNER = os.getenv("GITHUB_REPO_OWNER", "kyegomez")
|
||||||
|
# GITHUB_REPO_NAME = os.getenv("GITHUB_REPO_NAME", "swarms")
|
||||||
|
# BASE_URL = "https://api.github.com"
|
||||||
|
# GITHUB_HEADERS = {
|
||||||
|
# "Authorization": f"token {GITHUB_TOKEN}",
|
||||||
|
# "Accept": "application/vnd.github.v3+json",
|
||||||
|
# }
|
||||||
|
|
||||||
|
# --- Helper Functions ---
|
||||||
|
|
||||||
|
def generate_timestamp() -> str:
|
||||||
|
"""Generate a timestamp string for filenames"""
|
||||||
|
return datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
|
||||||
|
def write_markdown_report(results: List[Dict[str, Any]], filename: str):
|
||||||
|
"""Write test results to a markdown file"""
|
||||||
|
if not os.path.exists("test_runs"):
|
||||||
|
os.makedirs("test_runs")
|
||||||
|
|
||||||
|
with open(f"test_runs/{filename}.md", "w") as f:
|
||||||
|
f.write("# Swarms Comprehensive Test Report\n\n")
|
||||||
|
f.write(f"Test Run: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
||||||
|
|
||||||
|
total = len(results)
|
||||||
|
passed = sum(1 for r in results if r["status"] == "passed")
|
||||||
|
failed = total - passed
|
||||||
|
|
||||||
|
f.write("## Summary\n\n")
|
||||||
|
f.write(f"- **Total Tests:** {total}\n")
|
||||||
|
f.write(f"- **Passed:** {passed}\n")
|
||||||
|
f.write(f"- **Failed:** {failed}\n")
|
||||||
|
f.write(f"- **Success Rate:** {(passed/total)*100:.2f}%\n\n")
|
||||||
|
|
||||||
|
f.write("## Detailed Results\n\n")
|
||||||
|
for result in results:
|
||||||
|
f.write(f"### {result['test_name']}\n\n")
|
||||||
|
f.write(f"**Status:** {result['status'].upper()}\n\n")
|
||||||
|
if result.get("response"):
|
||||||
|
# Use triple backticks for json code block
|
||||||
|
f.write("Response:\n```json\n")
|
||||||
|
# Ensure response is a string, then attempt to pretty-print if it's JSON
|
||||||
|
response_str = result["response"]
|
||||||
|
try:
|
||||||
|
# Try to parse and re-dump for pretty printing
|
||||||
|
response_json = json.loads(response_str) if isinstance(response_str, str) else response_str
|
||||||
|
f.write(json.dumps(response_json, indent=2))
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
f.write(str(response_str))
|
||||||
|
f.write("\n```\n\n")
|
||||||
|
|
||||||
|
if result.get("error"):
|
||||||
|
f.write(f"**Error:**\n```\n{result['error']}\n```\n\n")
|
||||||
|
f.write("---\n\n")
|
||||||
|
|
||||||
|
# def create_github_issue(test_result: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
# """Create a GitHub issue for a failed test"""
|
||||||
|
# if not all([GITHUB_TOKEN, GITHUB_REPO_OWNER, GITHUB_REPO_NAME]):
|
||||||
|
# logger.warning("GitHub credentials not configured. Skipping issue creation.")
|
||||||
|
# return None
|
||||||
|
|
||||||
|
# if test_result["status"] != "failed":
|
||||||
|
# return None
|
||||||
|
|
||||||
|
# issue_title = f"Automated Test Failure: {test_result['test_name']}"
|
||||||
|
|
||||||
|
# issue_body = f"""
|
||||||
|
# ## Test Failure Report
|
||||||
|
|
||||||
|
# - **Test Name**: `{test_result['test_name']}`
|
||||||
|
# - **Timestamp**: `{datetime.now().isoformat()}`
|
||||||
|
# - **Status**: {test_result['status']}
|
||||||
|
|
||||||
|
# ### Error Information
|
||||||
|
# ```
|
||||||
|
# {test_result.get('error', 'No error message available')}
|
||||||
|
# ```
|
||||||
|
|
||||||
|
# ### Response (if available)
|
||||||
|
# ```json
|
||||||
|
# {json.dumps(test_result.get('response', {}), indent=2)}
|
||||||
|
# ```
|
||||||
|
|
||||||
|
# ---
|
||||||
|
# *This issue was automatically generated by the Swarms testing workflow.*
|
||||||
|
# """
|
||||||
|
|
||||||
|
# payload = {
|
||||||
|
# "title": issue_title,
|
||||||
|
# "body": issue_body,
|
||||||
|
# "labels": ["bug", "test-failure", "automated-report"],
|
||||||
|
# }
|
||||||
|
|
||||||
|
# try:
|
||||||
|
# response = requests.post(
|
||||||
|
# f"{BASE_URL}/repos/{GITHUB_REPO_OWNER}/{GITHUB_REPO_NAME}/issues",
|
||||||
|
# headers=GITHUB_HEADERS,
|
||||||
|
# json=payload,
|
||||||
|
# )
|
||||||
|
# response.raise_for_status()
|
||||||
|
# logger.info(f"Created GitHub issue for {test_result['test_name']}")
|
||||||
|
# return response.json()
|
||||||
|
# except requests.exceptions.RequestException as e:
|
||||||
|
# logger.error(f"Failed to create GitHub issue: {e.response.text if e.response else str(e)}")
|
||||||
|
# return None
|
||||||
|
|
||||||
|
# --- Test Cases ---
|
||||||
|
|
||||||
|
def test_tool_execution_with_agent():
|
||||||
|
"""Tests an agent's ability to use a provided tool."""
|
||||||
|
def simple_calculator(a: int, b: int) -> int:
|
||||||
|
"""A simple tool to add two numbers."""
|
||||||
|
return a + b
|
||||||
|
|
||||||
|
agent = Agent(
|
||||||
|
agent_name="CalculatorAgent",
|
||||||
|
system_prompt="You are an agent that uses a calculator tool.",
|
||||||
|
llm="gpt-4o",
|
||||||
|
max_loops=1,
|
||||||
|
tools=[simple_calculator],
|
||||||
|
output_type="str-all-except-first"
|
||||||
|
)
|
||||||
|
|
||||||
|
task = "Use the calculator to add 5 and 7."
|
||||||
|
response = agent.run(task)
|
||||||
|
|
||||||
|
# Check if the agent's output contains the expected result '12'.
|
||||||
|
# This is an indirect way to verify tool use. A more robust test would
|
||||||
|
# involve checking execution logs if the framework supports it.
|
||||||
|
assert "12" in response
|
||||||
|
return {"test_name": "test_tool_execution_with_agent", "status": "passed", "response": response}
|
||||||
|
|
||||||
|
def test_multimodal_execution():
|
||||||
|
"""Tests an agent's ability to process a single image."""
|
||||||
|
agent = Agent(
|
||||||
|
agent_name="VisionAgent",
|
||||||
|
system_prompt="You are an agent that describes images.",
|
||||||
|
llm="gpt-4o",
|
||||||
|
max_loops=1,
|
||||||
|
multi_modal=True
|
||||||
|
)
|
||||||
|
|
||||||
|
task = "Describe this image."
|
||||||
|
# Assumes a dummy image file is created by the GitHub Action
|
||||||
|
image_path = "tests/test_data/image1.jpg"
|
||||||
|
response = agent.run(task, img=image_path)
|
||||||
|
|
||||||
|
assert isinstance(response, str) and len(response) > 0
|
||||||
|
return {"test_name": "test_multimodal_execution", "status": "passed", "response": "Response received"}
|
||||||
|
|
||||||
|
def test_multiple_image_execution():
|
||||||
|
"""Tests an agent's ability to process multiple images."""
|
||||||
|
agent = Agent(
|
||||||
|
agent_name="MultiVisionAgent",
|
||||||
|
system_prompt="You are an agent that describes multiple images.",
|
||||||
|
llm="gpt-4o",
|
||||||
|
max_loops=1,
|
||||||
|
multi_modal=True
|
||||||
|
)
|
||||||
|
|
||||||
|
task = "Describe these two images."
|
||||||
|
# Assumes dummy image files are created by the GitHub Action
|
||||||
|
image_paths = ["tests/test_data/image1.jpg", "tests/test_data/image2.png"]
|
||||||
|
response = agent.run_multiple_images(task, imgs=image_paths)
|
||||||
|
|
||||||
|
assert isinstance(response, list) and len(response) == 2
|
||||||
|
return {"test_name": "test_multiple_image_execution", "status": "passed", "response": "Responses received for both images"}
|
||||||
|
|
||||||
|
def test_concurrent_workflow():
|
||||||
|
"""Tests the ConcurrentWorkflow with multiple agents."""
|
||||||
|
agents = [
|
||||||
|
Agent(agent_name="Agent1", llm="gpt-4o", max_loops=1),
|
||||||
|
Agent(agent_name="Agent2", llm="gpt-4o", max_loops=1)
|
||||||
|
]
|
||||||
|
workflow = ConcurrentWorkflow(agents=agents, max_loops=1)
|
||||||
|
|
||||||
|
task = "What are two different famous quotes?"
|
||||||
|
response = workflow.run(task)
|
||||||
|
|
||||||
|
assert isinstance(response, dict) and len(response) == 2
|
||||||
|
return {"test_name": "test_concurrent_workflow", "status": "passed", "response": response}
|
||||||
|
|
||||||
|
def test_sequential_workflow():
|
||||||
|
"""Tests the SequentialWorkflow with multiple agents."""
|
||||||
|
agents = [
|
||||||
|
Agent(agent_name="Agent1", system_prompt="Generate a famous quote.", llm="gpt-4o", max_loops=1),
|
||||||
|
Agent(agent_name="Agent2", system_prompt="Explain the meaning of the provided quote.", llm="gpt-4o", max_loops=1)
|
||||||
|
]
|
||||||
|
workflow = SequentialWorkflow(agents=agents, max_loops=1, output_type="final")
|
||||||
|
|
||||||
|
task = "Start by generating a quote, then explain it."
|
||||||
|
response = workflow.run(task)
|
||||||
|
|
||||||
|
assert isinstance(response, str) and len(response) > 0
|
||||||
|
return {"test_name": "test_sequential_workflow", "status": "passed", "response": response}
|
||||||
|
|
||||||
|
def test_streaming_and_non_streaming():
|
||||||
|
"""Tests both streaming and non-streaming modes."""
|
||||||
|
# Non-streaming
|
||||||
|
non_streaming_agent = Agent(agent_name="NonStreamer", llm="gpt-4o", max_loops=1, streaming_on=False)
|
||||||
|
non_streaming_response = non_streaming_agent.run("Tell me a short story.")
|
||||||
|
assert isinstance(non_streaming_response, str)
|
||||||
|
|
||||||
|
# Streaming
|
||||||
|
streaming_agent = Agent(agent_name="Streamer", llm="gpt-4o", max_loops=1, streaming_on=True)
|
||||||
|
streaming_response_generator = streaming_agent.run("Tell me a short story.")
|
||||||
|
|
||||||
|
full_response = ""
|
||||||
|
for chunk in streaming_response_generator:
|
||||||
|
# Check the structure of the chunk from litellm stream
|
||||||
|
if isinstance(chunk, dict) and 'choices' in chunk and chunk['choices'][0]['delta']['content']:
|
||||||
|
full_response += chunk['choices'][0]['delta']['content']
|
||||||
|
# Handle potential other chunk formats if necessary
|
||||||
|
|
||||||
|
assert isinstance(full_response, str) and len(full_response) > 0
|
||||||
|
return {"test_name": "test_streaming_and_non_streaming", "status": "passed", "response": "Both modes executed."}
|
||||||
|
|
||||||
|
|
||||||
|
# --- Test Orchestrator ---
|
||||||
|
|
||||||
|
def run_all_tests():
|
||||||
|
"""Run all tests and generate a report"""
|
||||||
|
logger.info("Starting Swarms Comprehensive Test Suite")
|
||||||
|
|
||||||
|
tests_to_run = [
|
||||||
|
test_tool_execution_with_agent,
|
||||||
|
test_multimodal_execution,
|
||||||
|
test_multiple_image_execution,
|
||||||
|
test_concurrent_workflow,
|
||||||
|
test_sequential_workflow,
|
||||||
|
test_streaming_and_non_streaming,
|
||||||
|
]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for test_func in tests_to_run:
|
||||||
|
test_name = test_func.__name__
|
||||||
|
try:
|
||||||
|
logger.info(f"Running test: {test_name}...")
|
||||||
|
result = test_func()
|
||||||
|
results.append(result)
|
||||||
|
logger.info(f"Test {test_name} PASSED.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Test {test_name} FAILED: {e}")
|
||||||
|
error_details = {
|
||||||
|
"test_name": test_name,
|
||||||
|
"status": "failed",
|
||||||
|
"error": str(e),
|
||||||
|
"response": "Test execution failed"
|
||||||
|
}
|
||||||
|
results.append(error_details)
|
||||||
|
# create_github_issue(error_details)
|
||||||
|
|
||||||
|
timestamp = generate_timestamp()
|
||||||
|
write_markdown_report(results, f"test_report_{timestamp}")
|
||||||
|
|
||||||
|
# Check for failures and exit with a non-zero code if any test failed
|
||||||
|
if any(r['status'] == 'failed' for r in results):
|
||||||
|
logger.error("One or more tests failed. Check the report and logs.")
|
||||||
|
exit(1)
|
||||||
|
else:
|
||||||
|
logger.success("All tests passed successfully!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if not API_KEY:
|
||||||
|
logger.error("OPENAI_API_KEY environment variable not set. Aborting tests.")
|
||||||
|
else:
|
||||||
|
run_all_tests()
|
Loading…
Reference in new issue