[FEAT][GitHub Actions] Add comprehensive testing workflow and test suite

5 days ago · f3e26623ae
parent 73dca28bd5
commit f3e26623ae
2 changed files with 378 additions and 0 deletions
--- a/.github/workflows/comprehensive_tests.yml
+++ b/.github/workflows/comprehensive_tests.yml
@ -0,0 +1,89 @@
+# .github/workflows/comprehensive_tests.yml
+
+name: Swarms Comprehensive Tests
+
+# This workflow triggers on pushes and pull requests to the master branch.
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        # You can test against multiple Python versions here if needed.
+        python-version: ["3.10"]
+
+    steps:
+    # Step 1: Check out the code.
+    # For pull requests, this action automatically checks out the code
+    # from the PR's branch, not the master branch. This is the key
+    # to testing the proposed changes.
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    # Step 2: Set up the specified Python version.
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    # Step 3: Install Poetry for dependency management.
+    - name: Install Poetry
+      uses: snok/install-poetry@v1
+      with:
+        virtualenvs-create: true
+        virtualenvs-in-project: true
+
+    # Step 4: Cache dependencies to speed up subsequent runs.
+    - name: Load cached venv
+      id: cached-poetry-dependencies
+      uses: actions/cache@v3
+      with:
+        path: .venv
+        key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
+
+    # Step 5: Install dependencies and the project package itself.
+    # This is the crucial step. 'poetry install' will install all dependencies
+    # and also install the 'swarms' package from the checked-out PR code
+    # in editable mode within the virtual environment.
+    - name: Install dependencies
+      if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+      run: poetry install --no-interaction --with dev --all-extras
+
+    # Step 6: Create dummy image files required for multi-modal tests.
+    # This ensures your tests are self-contained.
+    - name: Create dummy image files for testing
+      run: |
+        mkdir -p tests/test_data
+        touch tests/test_data/image1.jpg
+        touch tests/test_data/image2.png
+        echo "dummy image data" > tests/test_data/image1.jpg
+        echo "dummy image data" > tests/test_data/image2.png
+
+    # Step 7: Run the comprehensive test suite.
+    # 'poetry run' executes the command within the virtual environment,
+    # ensuring that when 'tests/comprehensive_test.py' imports 'swarms',
+    # it's importing the code from the pull request.
+    - name: Run Comprehensive Test Suite
+      env:
+        # Securely pass API keys and other secrets to the test environment.
+        # These must be configured in your repository's secrets.
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        # GITHUB_REPO_OWNER: "kyegomez"
+        # GITHUB_REPO_NAME: "swarms"
+      run: |
+        poetry run python tests/comprehensive_test.py
+
+    # Step 8: Upload the generated test report as an artifact.
+    # This happens even if the previous steps fail, allowing you to debug.
+    - name: Upload Test Report
+      if: always()
+      uses: actions/upload-artifact@v3
+      with:
+        name: test-report-${{ matrix.python-version }}
+        path: test_runs/
--- a/tests/comprehensive_test.py
+++ b/tests/comprehensive_test.py
@ -0,0 +1,289 @@
+import os
+import json
+import time
+from datetime import datetime
+from typing import List, Dict, Any, Callable
+
+import requests
+from dotenv import load_dotenv
+
+# Basic Imports for Swarms
+from swarms.structs import Agent, SequentialWorkflow, ConcurrentWorkflow
+from swarms.tools.base_tool import BaseTool
+
+# Setup Logging
+from loguru import logger
+logger.add("test_runs/test_failures.log", rotation="10 MB", level="ERROR")
+
+# Load environment variables
+load_dotenv()
+
+# --- Constants and Configuration ---
+API_KEY = os.getenv("OPENAI_API_KEY")
+# GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
+# GITHUB_REPO_OWNER = os.getenv("GITHUB_REPO_OWNER", "kyegomez")
+# GITHUB_REPO_NAME = os.getenv("GITHUB_REPO_NAME", "swarms")
+# BASE_URL = "https://api.github.com"
+# GITHUB_HEADERS = {
+#     "Authorization": f"token {GITHUB_TOKEN}",
+#     "Accept": "application/vnd.github.v3+json",
+# }
+
+# --- Helper Functions ---
+
+def generate_timestamp() -> str:
+    """Generate a timestamp string for filenames"""
+    return datetime.now().strftime("%Y%m%d_%H%M%S")
+
+def write_markdown_report(results: List[Dict[str, Any]], filename: str):
+    """Write test results to a markdown file"""
+    if not os.path.exists("test_runs"):
+        os.makedirs("test_runs")
+
+    with open(f"test_runs/{filename}.md", "w") as f:
+        f.write("# Swarms Comprehensive Test Report\n\n")
+        f.write(f"Test Run: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
+
+        total = len(results)
+        passed = sum(1 for r in results if r["status"] == "passed")
+        failed = total - passed
+
+        f.write("## Summary\n\n")
+        f.write(f"- **Total Tests:** {total}\n")
+        f.write(f"- **Passed:** {passed}\n")
+        f.write(f"- **Failed:** {failed}\n")
+        f.write(f"- **Success Rate:** {(passed/total)*100:.2f}%\n\n")
+
+        f.write("## Detailed Results\n\n")
+        for result in results:
+            f.write(f"### {result['test_name']}\n\n")
+            f.write(f"**Status:** {result['status'].upper()}\n\n")
+            if result.get("response"):
+                # Use triple backticks for json code block
+                f.write("Response:\n```json\n")
+                # Ensure response is a string, then attempt to pretty-print if it's JSON
+                response_str = result["response"]
+                try:
+                    # Try to parse and re-dump for pretty printing
+                    response_json = json.loads(response_str) if isinstance(response_str, str) else response_str
+                    f.write(json.dumps(response_json, indent=2))
+                except (json.JSONDecodeError, TypeError):
+                    f.write(str(response_str))
+                f.write("\n```\n\n")
+
+            if result.get("error"):
+                f.write(f"**Error:**\n```\n{result['error']}\n```\n\n")
+            f.write("---\n\n")
+
+# def create_github_issue(test_result: Dict[str, Any]) -> Dict[str, Any]:
+#     """Create a GitHub issue for a failed test"""
+#     if not all([GITHUB_TOKEN, GITHUB_REPO_OWNER, GITHUB_REPO_NAME]):
+#         logger.warning("GitHub credentials not configured. Skipping issue creation.")
+#         return None
+
+#     if test_result["status"] != "failed":
+#         return None
+
+#     issue_title = f"Automated Test Failure: {test_result['test_name']}"
+    
+#     issue_body = f"""
+# ## Test Failure Report
+
+# - **Test Name**: `{test_result['test_name']}`
+# - **Timestamp**: `{datetime.now().isoformat()}`
+# - **Status**: {test_result['status']}
+
+# ### Error Information
+# ```
+# {test_result.get('error', 'No error message available')}
+# ```
+
+# ### Response (if available)
+# ```json
+# {json.dumps(test_result.get('response', {}), indent=2)}
+# ```
+
+# ---
+# *This issue was automatically generated by the Swarms testing workflow.*
+# """
+
+#     payload = {
+#         "title": issue_title,
+#         "body": issue_body,
+#         "labels": ["bug", "test-failure", "automated-report"],
+#     }
+
+#     try:
+#         response = requests.post(
+#             f"{BASE_URL}/repos/{GITHUB_REPO_OWNER}/{GITHUB_REPO_NAME}/issues",
+#             headers=GITHUB_HEADERS,
+#             json=payload,
+#         )
+#         response.raise_for_status()
+#         logger.info(f"Created GitHub issue for {test_result['test_name']}")
+#         return response.json()
+#     except requests.exceptions.RequestException as e:
+#         logger.error(f"Failed to create GitHub issue: {e.response.text if e.response else str(e)}")
+#         return None
+
+# --- Test Cases ---
+
+def test_tool_execution_with_agent():
+    """Tests an agent's ability to use a provided tool."""
+    def simple_calculator(a: int, b: int) -> int:
+        """A simple tool to add two numbers."""
+        return a + b
+
+    agent = Agent(
+        agent_name="CalculatorAgent",
+        system_prompt="You are an agent that uses a calculator tool.",
+        llm="gpt-4o",
+        max_loops=1,
+        tools=[simple_calculator],
+        output_type="str-all-except-first"
+    )
+    
+    task = "Use the calculator to add 5 and 7."
+    response = agent.run(task)
+    
+    # Check if the agent's output contains the expected result '12'.
+    # This is an indirect way to verify tool use. A more robust test would
+    # involve checking execution logs if the framework supports it.
+    assert "12" in response
+    return {"test_name": "test_tool_execution_with_agent", "status": "passed", "response": response}
+
+def test_multimodal_execution():
+    """Tests an agent's ability to process a single image."""
+    agent = Agent(
+        agent_name="VisionAgent",
+        system_prompt="You are an agent that describes images.",
+        llm="gpt-4o",
+        max_loops=1,
+        multi_modal=True
+    )
+    
+    task = "Describe this image."
+    # Assumes a dummy image file is created by the GitHub Action
+    image_path = "tests/test_data/image1.jpg"
+    response = agent.run(task, img=image_path)
+    
+    assert isinstance(response, str) and len(response) > 0
+    return {"test_name": "test_multimodal_execution", "status": "passed", "response": "Response received"}
+
+def test_multiple_image_execution():
+    """Tests an agent's ability to process multiple images."""
+    agent = Agent(
+        agent_name="MultiVisionAgent",
+        system_prompt="You are an agent that describes multiple images.",
+        llm="gpt-4o",
+        max_loops=1,
+        multi_modal=True
+    )
+    
+    task = "Describe these two images."
+    # Assumes dummy image files are created by the GitHub Action
+    image_paths = ["tests/test_data/image1.jpg", "tests/test_data/image2.png"]
+    response = agent.run_multiple_images(task, imgs=image_paths)
+
+    assert isinstance(response, list) and len(response) == 2
+    return {"test_name": "test_multiple_image_execution", "status": "passed", "response": "Responses received for both images"}
+
+def test_concurrent_workflow():
+    """Tests the ConcurrentWorkflow with multiple agents."""
+    agents = [
+        Agent(agent_name="Agent1", llm="gpt-4o", max_loops=1),
+        Agent(agent_name="Agent2", llm="gpt-4o", max_loops=1)
+    ]
+    workflow = ConcurrentWorkflow(agents=agents, max_loops=1)
+    
+    task = "What are two different famous quotes?"
+    response = workflow.run(task)
+    
+    assert isinstance(response, dict) and len(response) == 2
+    return {"test_name": "test_concurrent_workflow", "status": "passed", "response": response}
+
+def test_sequential_workflow():
+    """Tests the SequentialWorkflow with multiple agents."""
+    agents = [
+        Agent(agent_name="Agent1", system_prompt="Generate a famous quote.", llm="gpt-4o", max_loops=1),
+        Agent(agent_name="Agent2", system_prompt="Explain the meaning of the provided quote.", llm="gpt-4o", max_loops=1)
+    ]
+    workflow = SequentialWorkflow(agents=agents, max_loops=1, output_type="final")
+    
+    task = "Start by generating a quote, then explain it."
+    response = workflow.run(task)
+    
+    assert isinstance(response, str) and len(response) > 0
+    return {"test_name": "test_sequential_workflow", "status": "passed", "response": response}
+
+def test_streaming_and_non_streaming():
+    """Tests both streaming and non-streaming modes."""
+    # Non-streaming
+    non_streaming_agent = Agent(agent_name="NonStreamer", llm="gpt-4o", max_loops=1, streaming_on=False)
+    non_streaming_response = non_streaming_agent.run("Tell me a short story.")
+    assert isinstance(non_streaming_response, str)
+
+    # Streaming
+    streaming_agent = Agent(agent_name="Streamer", llm="gpt-4o", max_loops=1, streaming_on=True)
+    streaming_response_generator = streaming_agent.run("Tell me a short story.")
+    
+    full_response = ""
+    for chunk in streaming_response_generator:
+        # Check the structure of the chunk from litellm stream
+        if isinstance(chunk, dict) and 'choices' in chunk and chunk['choices'][0]['delta']['content']:
+            full_response += chunk['choices'][0]['delta']['content']
+        # Handle potential other chunk formats if necessary
+        
+    assert isinstance(full_response, str) and len(full_response) > 0
+    return {"test_name": "test_streaming_and_non_streaming", "status": "passed", "response": "Both modes executed."}
+
+
+# --- Test Orchestrator ---
+
+def run_all_tests():
+    """Run all tests and generate a report"""
+    logger.info("Starting Swarms Comprehensive Test Suite")
+
+    tests_to_run = [
+        test_tool_execution_with_agent,
+        test_multimodal_execution,
+        test_multiple_image_execution,
+        test_concurrent_workflow,
+        test_sequential_workflow,
+        test_streaming_and_non_streaming,
+    ]
+
+    results = []
+    for test_func in tests_to_run:
+        test_name = test_func.__name__
+        try:
+            logger.info(f"Running test: {test_name}...")
+            result = test_func()
+            results.append(result)
+            logger.info(f"Test {test_name} PASSED.")
+        except Exception as e:
+            logger.error(f"Test {test_name} FAILED: {e}")
+            error_details = {
+                "test_name": test_name,
+                "status": "failed",
+                "error": str(e),
+                "response": "Test execution failed"
+            }
+            results.append(error_details)
+            # create_github_issue(error_details)
+
+    timestamp = generate_timestamp()
+    write_markdown_report(results, f"test_report_{timestamp}")
+    
+    # Check for failures and exit with a non-zero code if any test failed
+    if any(r['status'] == 'failed' for r in results):
+        logger.error("One or more tests failed. Check the report and logs.")
+        exit(1)
+    else:
+        logger.success("All tests passed successfully!")
+
+if __name__ == "__main__":
+    if not API_KEY:
+        logger.error("OPENAI_API_KEY environment variable not set. Aborting tests.")
+    else:
+        run_all_tests()