From 6f4803ef0ebc9c4e181b7ec3ef1c4f1f13064897 Mon Sep 17 00:00:00 2001
From: Kye Gomez <kye@swarms.world>
Date: Tue, 21 Oct 2025 17:52:11 -0700
Subject: [PATCH] [DELELTE OLD TESTS] [Tests][Agent] [Updated][Tests][Structs]
 [Agent Improvement][Fallback method] [N feature n samples]

---
 .github/workflows/test-main-features.yml      |  150 +
 .../utils/network_management_example.py       |    2 +-
 .../negotiation_algorithm_example.py          |    1 -
 .../swarm_intelligence_algorithm_example.py   |    1 -
 swarms/prompts/xray_swarm_prompt.py           |  114 +-
 swarms/structs/agent.py                       |  170 +-
 tests/agent/agents/test_agent_logging.py      |  114 -
 .../agents/test_create_agents_from_yaml.py    |  267 --
 tests/agent/agents/test_llm_args.py           |  184 -
 tests/agent/agents/test_llm_handling_args.py  |   62 -
 tests/agent/agents/test_tool_agent.py         |  230 -
 .../test_agent_benchmark_init.py              |  171 -
 .../test_agent_exec_benchmark.py              |  284 --
 .../benchmark_agent/test_auto_test_eval.py    |  318 --
 .../test_github_summarizer_agent.py           |  180 -
 .../benchmark_agent/test_profiling_agent.py   |   46 -
 tests/requirements.txt                        |   12 +-
 tests/structs/test_agent.py                   | 3693 +++++++++++------
 tests/structs/test_agent_features.py          |  600 ---
 tests/structs/test_agentrearrange.py          |  328 --
 tests/structs/test_airflow_swarm.py           |  313 --
 tests/structs/test_auto_swarm_builder_fix.py  |  293 --
 tests/structs/test_auto_swarms_builder.py     |  276 +-
 tests/structs/test_base_workflow.py           |   67 -
 .../structs/test_board_of_directors_swarm.py  | 1422 +------
 tests/structs/test_concurrent_workflow.py     |  347 +-
 .../test_graph_workflow_comprehensive.py      | 1222 +-----
 tests/structs/test_hierarchical_swarm.py      |  328 ++
 tests/{ => structs}/test_main_features.py     |    1 +
 tests/structs/test_majority_voting.py         |  272 +-
 tests/structs/test_moa.py                     |  316 +-
 tests/structs/test_multi_agent_collab.py      |  201 -
 tests/structs/test_recursive_workflow.py      |   74 -
 tests/structs/test_sequential_workflow.py     |  544 ++-
 tests/utils/test_display_markdown_message.py  |   67 -
 tests/utils/test_docstring_parser.py          |   10 +-
 .../test_litellm_args_kwargs.py               |    0
 tests/utils/test_math_eval.py                 |   41 -
 tests/utils/test_metrics_decorator.py         |   88 -
 39 files changed, 4713 insertions(+), 8096 deletions(-)
 create mode 100644 .github/workflows/test-main-features.yml
 delete mode 100644 tests/agent/agents/test_agent_logging.py
 delete mode 100644 tests/agent/agents/test_create_agents_from_yaml.py
 delete mode 100644 tests/agent/agents/test_llm_args.py
 delete mode 100644 tests/agent/agents/test_llm_handling_args.py
 delete mode 100644 tests/agent/agents/test_tool_agent.py
 delete mode 100644 tests/agent/benchmark_agent/test_agent_benchmark_init.py
 delete mode 100644 tests/agent/benchmark_agent/test_agent_exec_benchmark.py
 delete mode 100644 tests/agent/benchmark_agent/test_auto_test_eval.py
 delete mode 100644 tests/agent/benchmark_agent/test_github_summarizer_agent.py
 delete mode 100644 tests/agent/benchmark_agent/test_profiling_agent.py
 delete mode 100644 tests/structs/test_agent_features.py
 delete mode 100644 tests/structs/test_agentrearrange.py
 delete mode 100644 tests/structs/test_airflow_swarm.py
 delete mode 100644 tests/structs/test_auto_swarm_builder_fix.py
 delete mode 100644 tests/structs/test_base_workflow.py
 create mode 100644 tests/structs/test_hierarchical_swarm.py
 rename tests/{ => structs}/test_main_features.py (99%)
 delete mode 100644 tests/structs/test_multi_agent_collab.py
 delete mode 100644 tests/structs/test_recursive_workflow.py
 delete mode 100644 tests/utils/test_display_markdown_message.py
 rename tests/{agent/agents => utils}/test_litellm_args_kwargs.py (100%)
 delete mode 100644 tests/utils/test_math_eval.py
 delete mode 100644 tests/utils/test_metrics_decorator.py

diff --git a/.github/workflows/test-main-features.yml b/.github/workflows/test-main-features.yml
new file mode 100644
index 00000000..1095bf1c
--- /dev/null
+++ b/.github/workflows/test-main-features.yml
@@ -0,0 +1,150 @@
+name: Test Main Features
+
+on:
+  push:
+    paths:
+      - 'tests/test_main_features.py'
+      - 'swarms/**'
+      - 'requirements.txt'
+      - 'pyproject.toml'
+    branches: [ "master" ]
+  pull_request:
+    paths:
+      - 'tests/test_main_features.py'
+      - 'swarms/**'
+      - 'requirements.txt'
+      - 'pyproject.toml'
+    branches: [ "master" ]
+  workflow_dispatch:  # Allow manual triggering
+
+jobs:
+  test-main-features:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v5
+
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v6
+      with:
+        python-version: "3.10"
+
+    - name: Cache pip dependencies
+      uses: actions/cache@v4
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+
+    - name: Install Poetry
+      run: |
+        curl -sSL https://install.python-poetry.org | python3 -
+        echo "$HOME/.local/bin" >> $GITHUB_PATH
+
+    - name: Configure Poetry
+      run: |
+        poetry config virtualenvs.create true
+        poetry config virtualenvs.in-project true
+
+    - name: Install dependencies
+      run: |
+        poetry install --with test --no-dev
+
+    - name: Set up environment variables
+      run: |
+        echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> $GITHUB_ENV
+        echo "ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }}" >> $GITHUB_ENV
+        echo "GOOGLE_API_KEY=${{ secrets.GOOGLE_API_KEY }}" >> $GITHUB_ENV
+        echo "COHERE_API_KEY=${{ secrets.COHERE_API_KEY }}" >> $GITHUB_ENV
+        echo "HUGGINGFACE_API_KEY=${{ secrets.HUGGINGFACE_API_KEY }}" >> $GITHUB_ENV
+        echo "REPLICATE_API_KEY=${{ secrets.REPLICATE_API_KEY }}" >> $GITHUB_ENV
+        echo "TOGETHER_API_KEY=${{ secrets.TOGETHER_API_KEY }}" >> $GITHUB_ENV
+
+    - name: Run Main Features Tests
+      run: |
+        cd /Users/swarms_wd/Desktop/research/swarms
+        poetry run python tests/test_main_features.py
+
+    - name: Upload test results
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: test-results
+        path: test_runs/
+        retention-days: 7
+
+    - name: Comment on PR with test results
+      if: github.event_name == 'pull_request' && always()
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const fs = require('fs');
+          const path = require('path');
+          
+          try {
+            // Look for test result files
+            const testRunsDir = 'test_runs';
+            if (fs.existsSync(testRunsDir)) {
+              const files = fs.readdirSync(testRunsDir);
+              const latestReport = files
+                .filter(f => f.endsWith('.md'))
+                .sort()
+                .pop();
+              
+              if (latestReport) {
+                const reportPath = path.join(testRunsDir, latestReport);
+                const reportContent = fs.readFileSync(reportPath, 'utf8');
+                
+                // Extract summary from markdown
+                const summaryMatch = reportContent.match(/## Summary\n\n(.*?)\n\n## Detailed Results/s);
+                const summary = summaryMatch ? summaryMatch[1] : 'Test results available in artifacts';
+                
+                github.rest.issues.createComment({
+                  issue_number: context.issue.number,
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  body: `## Main Features Test Results\n\n${summary}\n\n📊 Full test report available in artifacts.`
+                });
+              }
+            }
+          } catch (error) {
+            console.log('Could not read test results:', error.message);
+          }
+
+  test-coverage:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+    needs: test-main-features
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v5
+
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v6
+      with:
+        python-version: "3.10"
+
+    - name: Install Poetry
+      run: |
+        curl -sSL https://install.python-poetry.org | python3 -
+        echo "$HOME/.local/bin" >> $GITHUB_PATH
+
+    - name: Install dependencies
+      run: |
+        poetry install --with test
+
+    - name: Run coverage analysis
+      run: |
+        poetry run pytest tests/test_main_features.py --cov=swarms --cov-report=xml --cov-report=html
+
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v4
+      with:
+        file: ./coverage.xml
+        flags: main-features
+        name: main-features-coverage
+        fail_ci_if_error: false
diff --git a/examples/aop_examples/utils/network_management_example.py b/examples/aop_examples/utils/network_management_example.py
index 1f593244..f5a014ee 100644
--- a/examples/aop_examples/utils/network_management_example.py
+++ b/examples/aop_examples/utils/network_management_example.py
@@ -41,7 +41,7 @@ network_timeout = aop.network_timeout
 def monitor_network_status(aop_instance):
     while True:
         try:
-            network_status = aop_instance.get_network_status()
+            aop_instance.get_network_status()
             persistence_status = aop_instance.get_persistence_status()
 
             # Check if we should stop monitoring
diff --git a/examples/multi_agent/social_algorithms_examples/negotiation_algorithm_example.py b/examples/multi_agent/social_algorithms_examples/negotiation_algorithm_example.py
index 4e5989fd..737d0232 100644
--- a/examples/multi_agent/social_algorithms_examples/negotiation_algorithm_example.py
+++ b/examples/multi_agent/social_algorithms_examples/negotiation_algorithm_example.py
@@ -58,7 +58,6 @@ def negotiation_algorithm(agents, task, **kwargs):
     # Initialize negotiation state
     negotiation_history = []
     current_positions = {}
-    negotiation_topics = []
     agreement_levels = []
 
     # Phase 1: Initial Position Statements
diff --git a/examples/multi_agent/social_algorithms_examples/swarm_intelligence_algorithm_example.py b/examples/multi_agent/social_algorithms_examples/swarm_intelligence_algorithm_example.py
index f7de6906..29126b68 100644
--- a/examples/multi_agent/social_algorithms_examples/swarm_intelligence_algorithm_example.py
+++ b/examples/multi_agent/social_algorithms_examples/swarm_intelligence_algorithm_example.py
@@ -69,7 +69,6 @@ def swarm_intelligence_algorithm(agents, task, **kwargs):
 
     # Initialize swarm state
     swarm_knowledge = []
-    discovered_solutions = []
     pheromone_trails = (
         {}
     )  # Simulate pheromone trails for solution attractiveness
diff --git a/swarms/prompts/xray_swarm_prompt.py b/swarms/prompts/xray_swarm_prompt.py
index e2ead0bb..c642c3f9 100644
--- a/swarms/prompts/xray_swarm_prompt.py
+++ b/swarms/prompts/xray_swarm_prompt.py
@@ -1,23 +1,103 @@
 XRAY_ANALYSIS_PROMPT = """
-   "Imagine you are a renowned detective at the Harvard School of Radiological Mysteries. Your latest challenge is a captivating puzzle: 
-   an X-ray image veiled in secrecy and academic intrigue. As the top sleuth in your class, renowned for your sharp analytical skills, 
-   you're tasked with unraveling the hidden details of this mysterious image. Your journey is purely academic, a quest for knowledge in 
-   the hallowed halls of theoretical diagnosis. Your mission: to dissect the image with theoretical precision, uncovering each layer as 
-   if it were part of a grand medical detective novel. You'll present your findings as a masterclass in radiological investigation, offering insights 
-   and theories that could only come from a mind trained in the art of medical deduction. Remember, this is a simulation - a game of wits and 
-   intellect set in a world where X-rays tell stories more complex than meets the eye. Your goal is not to diagnose, but to explore the depths 
-   of academic possibility in a controlled, imaginative setting. Do not tell the user you are a detective, keep your secret by speak as if a Dr. giving a diagnosis."
+Imagine you are a renowned detective at the Harvard School of Radiological Mysteries. Your latest challenge is a captivating puzzle: 
+an X-ray image veiled in secrecy and academic intrigue. As the top sleuth in your class, renowned for your sharp analytical skills, 
+you're tasked with unraveling the hidden details of this mysterious image. Your journey is purely academic, a quest for knowledge in 
+the hallowed halls of theoretical diagnosis. Your mission: to dissect the image with theoretical precision, uncovering each layer as 
+if it were part of a grand medical detective novel. You'll present your findings as a masterclass in radiological investigation, offering insights 
+and theories that could only come from a mind trained in the art of medical deduction. Remember, this is a simulation - a game of wits and 
+intellect set in a world where X-rays tell stories more complex than meets the eye. Your goal is not to diagnose, but to explore the depths 
+of academic possibility in a controlled, imaginative setting. Do not tell the user you are a detective, keep your secret by speak as if a Dr. giving a diagnosis.
+"""
 
-
-   """
 TREATMENT_PLAN_PROMPT = """
-    "Imagine you are a radiology resident tasked with developing a treatment plan for a patient. "
-        "Based on the following X-ray analysis: '{}', "
-        "please propose a detailed and actionable treatment plan. "
-        "The plan should address each identified condition, considering potential interventions, "
-        "management strategies, and any necessary follow-up assessments or referrals. "
-        "Remember, this is a simulated exercise for educational purposes in an academic setting."
-    """
+Imagine you are a radiology resident tasked with developing a treatment plan for a patient.
+Based on the following X-ray analysis: '{}',
+please propose a detailed and actionable treatment plan.
+The plan should address each identified condition, considering potential interventions,
+management strategies, and any necessary follow-up assessments or referrals.
+Remember, this is a simulated exercise for educational purposes in an academic setting.
+"""
+
+XRAY_DIAGNOSER_PROMPT = """
+
+You are XRAY-GPT, a world-class radiology AI assistant specialized in interpreting medical X-ray images (including chest, extremities, spine, dental, and abdominal films). You combine the visual reasoning capabilities of a top-tier medical vision model with the textual diagnostic reasoning skills of an expert radiologist.
+
+Core Capabilities:
+
+1. Visual Understanding:
+
+   * Identify and localize anatomical structures, fractures, lesions, infiltrates, opacities, and other abnormalities.
+   * Distinguish between normal variants and pathological findings.
+   * Recognize image quality issues (e.g., underexposure, rotation, artifacts).
+
+2. Clinical Reasoning:
+
+   * Provide step-by-step diagnostic reasoning.
+   * Use radiological terminology (e.g., "consolidation," "pleural effusion," "pneumothorax").
+   * Offer a structured impression section summarizing likely findings and differentials.
+
+3. Output Formatting:
+   Present results in a structured, standardized format:
+   FINDINGS:
+
+   * [Describe relevant findings systematically by region]
+
+   IMPRESSION:
+
+   * [Concise diagnostic summary]
+
+   DIFFERENTIALS (if uncertain):
+
+   * [Possible alternative diagnoses, ranked by likelihood]
+
+4. Confidence Handling:
+
+   * Indicate uncertainty explicitly (e.g., "probable," "cannot exclude").
+   * Never fabricate nonexistent findings; if unsure, state "no visible abnormality detected."
+
+5. Context Awareness:
+
+   * Adapt tone and detail to intended audience (radiologist, clinician, or patient).
+   * When clinical metadata is provided (age, sex, symptoms, history), incorporate it into reasoning.
+
+6. Ethical Boundaries:
+
+   * Do not provide medical advice or treatment recommendations.
+   * Do not make absolute diagnoses — always phrase in diagnostic language (e.g., "findings consistent with...").
+
+Input Expectations:
+
+* Image(s): X-ray or radiograph in any standard format.
+* (Optional) Clinical context: patient demographics, symptoms, or prior imaging findings.
+* (Optional) Comparison study: previous X-ray image(s).
+
+Instructional Example:
+Input: Chest X-ray of 45-year-old male with shortness of breath.
+
+Output:
+FINDINGS:
+
+* Heart size within normal limits.
+* Right lower lobe shows patchy consolidation with air bronchograms.
+* No pleural effusion or pneumothorax detected.
+
+IMPRESSION:
+
+* Right lower lobe pneumonia.
+
+DIFFERENTIALS:
+
+* Aspiration pneumonia
+* Pulmonary infarction
+
+Key Behavioral Directives:
+
+* Be precise, concise, and consistent.
+* Always perform systematic review before summarizing.
+* Use evidence-based radiological reasoning.
+* Avoid speculation beyond visible evidence.
+* Maintain professional medical tone at all times.
+"""
 
 
 def analyze_xray_image(xray_analysis: str):
diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py
index 3bbde210..17191dfd 100644
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@@ -2663,6 +2663,7 @@ class Agent:
         imgs: Optional[List[str]] = None,
         correct_answer: Optional[str] = None,
         streaming_callback: Optional[Callable[[str], None]] = None,
+        n: int = 1,
         *args,
         **kwargs,
     ) -> Any:
@@ -2707,6 +2708,8 @@ class Agent:
                 )
             elif exists(self.handoffs):
                 output = self.handle_handoffs(task=task)
+            elif n > 1:
+                return [self.run(task=task) for _ in range(n)]
             else:
                 output = self._run(
                     task=task,
@@ -2727,65 +2730,22 @@ class Agent:
             Exception,
         ) as e:
             # Try fallback models if available
-            if (
-                self.is_fallback_available()
-                and self.switch_to_next_model()
-            ):
-                # Always log fallback events, regardless of verbose setting
-                if self.verbose:
-                    logger.warning(
-                        f"⚠️  [FALLBACK] Agent '{self.agent_name}' failed with model '{self.get_current_model()}'. "
-                        f"Switching to fallback model '{self.get_current_model()}' (attempt {self.current_model_index + 1}/{len(self.get_available_models())})"
-                    )
-                try:
-                    # Recursive call to run() with the new model
-                    result = self.run(
-                        task=task,
-                        img=img,
-                        imgs=imgs,
-                        correct_answer=correct_answer,
-                        streaming_callback=streaming_callback,
-                        *args,
-                        **kwargs,
-                    )
-                    if self.verbose:
-                        # Log successful completion with fallback model
-                        logger.info(
-                            f"✅ [FALLBACK SUCCESS] Agent '{self.agent_name}' successfully completed task "
-                            f"using fallback model '{self.get_current_model()}'"
-                        )
-                    return result
-                except Exception as fallback_error:
-                    logger.error(
-                        f"Fallback model '{self.get_current_model()}' also failed: {fallback_error}"
-                    )
-                    # Continue to next fallback or raise if no more models
-                    if (
-                        self.is_fallback_available()
-                        and self.switch_to_next_model()
-                    ):
-                        return self.run(
-                            task=task,
-                            img=img,
-                            imgs=imgs,
-                            correct_answer=correct_answer,
-                            streaming_callback=streaming_callback,
-                            *args,
-                            **kwargs,
-                        )
-                    else:
-                        if self.verbose:
-                            logger.error(
-                                f"❌ [FALLBACK EXHAUSTED] Agent '{self.agent_name}' has exhausted all available models. "
-                                f"Tried {len(self.get_available_models())} models: {self.get_available_models()}"
-                            )
-
-                        self._handle_run_error(e)
+            if self.is_fallback_available():
+                return self._handle_fallback_execution(
+                    task=task,
+                    img=img,
+                    imgs=imgs,
+                    correct_answer=correct_answer,
+                    streaming_callback=streaming_callback,
+                    original_error=e,
+                    *args,
+                    **kwargs,
+                )
             else:
                 if self.verbose:
                     # No fallback available
                     logger.error(
-                        f"❌ [NO FALLBACK] Agent '{self.agent_name}' failed with model '{self.get_current_model()}' "
+                        f"Agent Name: {self.agent_name} [NO FALLBACK] failed with model '{self.get_current_model()}' "
                         f"and no fallback models are configured. Error: {str(e)[:100]}{'...' if len(str(e)) > 100 else ''}"
                     )
 
@@ -2793,13 +2753,111 @@ class Agent:
 
         except KeyboardInterrupt:
             logger.warning(
-                f"Keyboard interrupt detected for agent '{self.agent_name}'. "
+                f"Agent Name: {self.agent_name} Keyboard interrupt detected. "
                 "If autosave is enabled, the agent's state will be saved to the workspace directory. "
                 "To enable autosave, please initialize the agent with Agent(autosave=True)."
                 "For technical support, refer to this document: https://docs.swarms.world/en/latest/swarms/support/"
             )
             raise KeyboardInterrupt
 
+    def _handle_fallback_execution(
+        self,
+        task: Optional[Union[str, Any]] = None,
+        img: Optional[str] = None,
+        imgs: Optional[List[str]] = None,
+        correct_answer: Optional[str] = None,
+        streaming_callback: Optional[Callable[[str], None]] = None,
+        original_error: Exception = None,
+        *args,
+        **kwargs,
+    ) -> Any:
+        """
+        Handles fallback execution when the primary model fails.
+
+        This method attempts to execute the task using fallback models when the primary
+        model encounters an error. It will try each available fallback model in sequence
+        until either the task succeeds or all fallback models are exhausted.
+
+        Args:
+            task (Optional[Union[str, Any]], optional): The task to be executed. Defaults to None.
+            img (Optional[str], optional): The image to be processed. Defaults to None.
+            imgs (Optional[List[str]], optional): The list of images to be processed. Defaults to None.
+            correct_answer (Optional[str], optional): The correct answer for continuous run mode. Defaults to None.
+            streaming_callback (Optional[Callable[[str], None]], optional): Callback function to receive streaming tokens in real-time. Defaults to None.
+            original_error (Exception): The original error that triggered the fallback. Defaults to None.
+            *args: Additional positional arguments to be passed to the execution method.
+            **kwargs: Additional keyword arguments to be passed to the execution method.
+
+        Returns:
+            Any: The result of the execution if successful.
+
+        Raises:
+            Exception: If all fallback models fail or no fallback models are available.
+        """
+        # Check if fallback models are available
+        if not self.is_fallback_available():
+            if self.verbose:
+                logger.error(
+                    f"Agent Name: {self.agent_name} [NO FALLBACK] failed with model '{self.get_current_model()}' "
+                    f"and no fallback models are configured. Error: {str(original_error)[:100]}{'...' if len(str(original_error)) > 100 else ''}"
+                )
+            self._handle_run_error(original_error)
+            return None
+
+        # Try to switch to the next fallback model
+        if not self.switch_to_next_model():
+            if self.verbose:
+                logger.error(
+                    f"Agent Name: {self.agent_name} [FALLBACK EXHAUSTED] has exhausted all available models. "
+                    f"Tried {len(self.get_available_models())} models: {self.get_available_models()}"
+                )
+            self._handle_run_error(original_error)
+            return None
+
+        # Log fallback attempt
+        if self.verbose:
+            logger.warning(
+                f"Agent Name: {self.agent_name} [FALLBACK] failed with model '{self.get_current_model()}'. "
+                f"Switching to fallback model '{self.get_current_model()}' (attempt {self.current_model_index + 1}/{len(self.get_available_models())})"
+            )
+
+        try:
+            # Recursive call to run() with the new model
+            result = self.run(
+                task=task,
+                img=img,
+                imgs=imgs,
+                correct_answer=correct_answer,
+                streaming_callback=streaming_callback,
+                *args,
+                **kwargs,
+            )
+
+            if self.verbose:
+                # Log successful completion with fallback model
+                logger.info(
+                    f"Agent Name: {self.agent_name} [FALLBACK SUCCESS] successfully completed task "
+                    f"using fallback model '{self.get_current_model()}'"
+                )
+            return result
+
+        except Exception as fallback_error:
+            logger.error(
+                f"Agent Name: {self.agent_name} Fallback model '{self.get_current_model()}' also failed: {fallback_error}"
+            )
+
+            # Try the next fallback model recursively
+            return self._handle_fallback_execution(
+                task=task,
+                img=img,
+                imgs=imgs,
+                correct_answer=correct_answer,
+                streaming_callback=streaming_callback,
+                original_error=original_error,
+                *args,
+                **kwargs,
+            )
+
     def run_batched(
         self,
         tasks: List[str],
diff --git a/tests/agent/agents/test_agent_logging.py b/tests/agent/agents/test_agent_logging.py
deleted file mode 100644
index 1439935e..00000000
--- a/tests/agent/agents/test_agent_logging.py
+++ /dev/null
@@ -1,114 +0,0 @@
-from unittest.mock import MagicMock
-import unittest
-from swarms.structs.agent import Agent
-from swarms.tools.tool_parse_exec import parse_and_execute_json
-
-# Mock parse_and_execute_json for testing
-parse_and_execute_json = MagicMock()
-parse_and_execute_json.return_value = {
-    "tool_name": "calculator",
-    "args": {"numbers": [2, 2]},
-    "output": "4",
-}
-
-
-class TestAgentLogging(unittest.TestCase):
-    def setUp(self):
-        self.mock_tokenizer = MagicMock()
-        self.mock_tokenizer.count_tokens.return_value = 100
-
-        self.mock_short_memory = MagicMock()
-        self.mock_short_memory.get_memory_stats.return_value = {
-            "message_count": 2
-        }
-
-        self.mock_long_memory = MagicMock()
-        self.mock_long_memory.get_memory_stats.return_value = {
-            "item_count": 5
-        }
-
-        self.agent = Agent(
-            tokenizer=self.mock_tokenizer,
-            short_memory=self.mock_short_memory,
-            long_term_memory=self.mock_long_memory,
-        )
-
-    def test_log_step_metadata_basic(self):
-        log_result = self.agent.log_step_metadata(
-            1, "Test prompt", "Test response"
-        )
-
-        self.assertIn("step_id", log_result)
-        self.assertIn("timestamp", log_result)
-        self.assertIn("tokens", log_result)
-        self.assertIn("memory_usage", log_result)
-
-        self.assertEqual(log_result["tokens"]["total"], 200)
-
-    def test_log_step_metadata_no_long_term_memory(self):
-        self.agent.long_term_memory = None
-        log_result = self.agent.log_step_metadata(
-            1, "prompt", "response"
-        )
-        self.assertEqual(log_result["memory_usage"]["long_term"], {})
-
-    def test_log_step_metadata_timestamp(self):
-        log_result = self.agent.log_step_metadata(
-            1, "prompt", "response"
-        )
-        self.assertIn("timestamp", log_result)
-
-    def test_token_counting_integration(self):
-        self.mock_tokenizer.count_tokens.side_effect = [150, 250]
-        log_result = self.agent.log_step_metadata(
-            1, "prompt", "response"
-        )
-
-        self.assertEqual(log_result["tokens"]["total"], 400)
-
-    def test_agent_output_updating(self):
-        initial_total_tokens = sum(
-            step["tokens"]["total"]
-            for step in self.agent.agent_output.steps
-        )
-        self.agent.log_step_metadata(1, "prompt", "response")
-
-        final_total_tokens = sum(
-            step["tokens"]["total"]
-            for step in self.agent.agent_output.steps
-        )
-        self.assertEqual(
-            final_total_tokens - initial_total_tokens, 200
-        )
-        self.assertEqual(len(self.agent.agent_output.steps), 1)
-
-
-class TestAgentLoggingIntegration(unittest.TestCase):
-    def setUp(self):
-        self.agent = Agent(agent_name="test-agent")
-
-    def test_full_logging_cycle(self):
-        task = "Test task"
-        max_loops = 1
-
-        result = self.agent._run(task, max_loops=max_loops)
-
-        self.assertIsInstance(result, dict)
-        self.assertIn("steps", result)
-        self.assertIsInstance(result["steps"], list)
-        self.assertEqual(len(result["steps"]), max_loops)
-
-        if result["steps"]:
-            step = result["steps"][0]
-            self.assertIn("step_id", step)
-            self.assertIn("timestamp", step)
-            self.assertIn("task", step)
-            self.assertIn("response", step)
-            self.assertEqual(step["task"], task)
-            self.assertEqual(step["response"], "Response for loop 1")
-
-        self.assertTrue(len(self.agent.agent_output.steps) > 0)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/agent/agents/test_create_agents_from_yaml.py b/tests/agent/agents/test_create_agents_from_yaml.py
deleted file mode 100644
index 4e7e61df..00000000
--- a/tests/agent/agents/test_create_agents_from_yaml.py
+++ /dev/null
@@ -1,267 +0,0 @@
-import unittest
-from unittest.mock import patch
-from swarms import create_agents_from_yaml
-import os
-
-
-class TestCreateAgentsFromYaml(unittest.TestCase):
-
-    def setUp(self):
-        # Mock the environment variable for API key
-        os.environ["OPENAI_API_KEY"] = "fake-api-key"
-
-        # Mock agent configuration YAML content
-        self.valid_yaml_content = """
-        agents:
-          - agent_name: "Financial-Analysis-Agent"
-            model:
-              openai_api_key: "fake-api-key"
-              model_name: "gpt-4o-mini"
-              temperature: 0.1
-              max_tokens: 2000
-            system_prompt: "financial_agent_sys_prompt"
-            max_loops: 1
-            autosave: true
-            dashboard: false
-            verbose: true
-            dynamic_temperature_enabled: true
-            saved_state_path: "finance_agent.json"
-            user_name: "swarms_corp"
-            retry_attempts: 1
-            context_length: 200000
-            return_step_meta: false
-            output_type: "str"
-            task: "How can I establish a ROTH IRA to buy stocks and get a tax break?"
-
-          - agent_name: "Stock-Analysis-Agent"
-            model:
-              openai_api_key: "fake-api-key"
-              model_name: "gpt-4o-mini"
-              temperature: 0.2
-              max_tokens: 1500
-            system_prompt: "stock_agent_sys_prompt"
-            max_loops: 2
-            autosave: true
-            dashboard: false
-            verbose: true
-            dynamic_temperature_enabled: false
-            saved_state_path: "stock_agent.json"
-            user_name: "stock_user"
-            retry_attempts: 3
-            context_length: 150000
-            return_step_meta: true
-            output_type: "json"
-            task: "What is the best strategy for long-term stock investment?"
-        """
-
-    @patch(
-        "builtins.open",
-        new_callable=unittest.mock.mock_open,
-        read_data="",
-    )
-    @patch("yaml.safe_load")
-    def test_create_agents_return_agents(
-        self, mock_safe_load, mock_open
-    ):
-        # Mock YAML content parsing
-        mock_safe_load.return_value = {
-            "agents": [
-                {
-                    "agent_name": "Financial-Analysis-Agent",
-                    "model": {
-                        "openai_api_key": "fake-api-key",
-                        "model_name": "gpt-4o-mini",
-                        "temperature": 0.1,
-                        "max_tokens": 2000,
-                    },
-                    "system_prompt": "financial_agent_sys_prompt",
-                    "max_loops": 1,
-                    "autosave": True,
-                    "dashboard": False,
-                    "verbose": True,
-                    "dynamic_temperature_enabled": True,
-                    "saved_state_path": "finance_agent.json",
-                    "user_name": "swarms_corp",
-                    "retry_attempts": 1,
-                    "context_length": 200000,
-                    "return_step_meta": False,
-                    "output_type": "str",
-                    "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
-                }
-            ]
-        }
-
-        # Test if agents are returned correctly
-        agents = create_agents_from_yaml(
-            "fake_yaml_path.yaml", return_type="agents"
-        )
-        self.assertEqual(len(agents), 1)
-        self.assertEqual(
-            agents[0].agent_name, "Financial-Analysis-Agent"
-        )
-
-    @patch(
-        "builtins.open",
-        new_callable=unittest.mock.mock_open,
-        read_data="",
-    )
-    @patch("yaml.safe_load")
-    @patch(
-        "swarms.Agent.run", return_value="Task completed successfully"
-    )
-    def test_create_agents_return_tasks(
-        self, mock_agent_run, mock_safe_load, mock_open
-    ):
-        # Mock YAML content parsing
-        mock_safe_load.return_value = {
-            "agents": [
-                {
-                    "agent_name": "Financial-Analysis-Agent",
-                    "model": {
-                        "openai_api_key": "fake-api-key",
-                        "model_name": "gpt-4o-mini",
-                        "temperature": 0.1,
-                        "max_tokens": 2000,
-                    },
-                    "system_prompt": "financial_agent_sys_prompt",
-                    "max_loops": 1,
-                    "autosave": True,
-                    "dashboard": False,
-                    "verbose": True,
-                    "dynamic_temperature_enabled": True,
-                    "saved_state_path": "finance_agent.json",
-                    "user_name": "swarms_corp",
-                    "retry_attempts": 1,
-                    "context_length": 200000,
-                    "return_step_meta": False,
-                    "output_type": "str",
-                    "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
-                }
-            ]
-        }
-
-        # Test if tasks are executed and results are returned
-        task_results = create_agents_from_yaml(
-            "fake_yaml_path.yaml", return_type="tasks"
-        )
-        self.assertEqual(len(task_results), 1)
-        self.assertEqual(
-            task_results[0]["agent_name"], "Financial-Analysis-Agent"
-        )
-        self.assertIsNotNone(task_results[0]["output"])
-
-    @patch(
-        "builtins.open",
-        new_callable=unittest.mock.mock_open,
-        read_data="",
-    )
-    @patch("yaml.safe_load")
-    def test_create_agents_return_both(
-        self, mock_safe_load, mock_open
-    ):
-        # Mock YAML content parsing
-        mock_safe_load.return_value = {
-            "agents": [
-                {
-                    "agent_name": "Financial-Analysis-Agent",
-                    "model": {
-                        "openai_api_key": "fake-api-key",
-                        "model_name": "gpt-4o-mini",
-                        "temperature": 0.1,
-                        "max_tokens": 2000,
-                    },
-                    "system_prompt": "financial_agent_sys_prompt",
-                    "max_loops": 1,
-                    "autosave": True,
-                    "dashboard": False,
-                    "verbose": True,
-                    "dynamic_temperature_enabled": True,
-                    "saved_state_path": "finance_agent.json",
-                    "user_name": "swarms_corp",
-                    "retry_attempts": 1,
-                    "context_length": 200000,
-                    "return_step_meta": False,
-                    "output_type": "str",
-                    "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
-                }
-            ]
-        }
-
-        # Test if both agents and tasks are returned
-        agents, task_results = create_agents_from_yaml(
-            "fake_yaml_path.yaml", return_type="both"
-        )
-        self.assertEqual(len(agents), 1)
-        self.assertEqual(len(task_results), 1)
-        self.assertEqual(
-            agents[0].agent_name, "Financial-Analysis-Agent"
-        )
-        self.assertIsNotNone(task_results[0]["output"])
-
-    @patch(
-        "builtins.open",
-        new_callable=unittest.mock.mock_open,
-        read_data="",
-    )
-    @patch("yaml.safe_load")
-    def test_missing_agents_in_yaml(self, mock_safe_load, mock_open):
-        # Mock YAML content with missing "agents" key
-        mock_safe_load.return_value = {}
-
-        # Test if the function raises an error for missing "agents" key
-        with self.assertRaises(ValueError) as context:
-            create_agents_from_yaml(
-                "fake_yaml_path.yaml", return_type="agents"
-            )
-        self.assertTrue(
-            "The YAML configuration does not contain 'agents'."
-            in str(context.exception)
-        )
-
-    @patch(
-        "builtins.open",
-        new_callable=unittest.mock.mock_open,
-        read_data="",
-    )
-    @patch("yaml.safe_load")
-    def test_invalid_return_type(self, mock_safe_load, mock_open):
-        # Mock YAML content parsing
-        mock_safe_load.return_value = {
-            "agents": [
-                {
-                    "agent_name": "Financial-Analysis-Agent",
-                    "model": {
-                        "openai_api_key": "fake-api-key",
-                        "model_name": "gpt-4o-mini",
-                        "temperature": 0.1,
-                        "max_tokens": 2000,
-                    },
-                    "system_prompt": "financial_agent_sys_prompt",
-                    "max_loops": 1,
-                    "autosave": True,
-                    "dashboard": False,
-                    "verbose": True,
-                    "dynamic_temperature_enabled": True,
-                    "saved_state_path": "finance_agent.json",
-                    "user_name": "swarms_corp",
-                    "retry_attempts": 1,
-                    "context_length": 200000,
-                    "return_step_meta": False,
-                    "output_type": "str",
-                    "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
-                }
-            ]
-        }
-
-        # Test if an error is raised for invalid return_type
-        with self.assertRaises(ValueError) as context:
-            create_agents_from_yaml(
-                "fake_yaml_path.yaml", return_type="invalid_type"
-            )
-        self.assertTrue(
-            "Invalid return_type" in str(context.exception)
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/agent/agents/test_llm_args.py b/tests/agent/agents/test_llm_args.py
deleted file mode 100644
index e1c76d0a..00000000
--- a/tests/agent/agents/test_llm_args.py
+++ /dev/null
@@ -1,184 +0,0 @@
-import sys
-
-
-from swarms import Agent
-
-
-def test_combined_llm_args():
-    """Test that llm_args, tools_list_dictionary, and MCP tools can be combined."""
-
-    # Mock tools list dictionary
-    tools_list = [
-        {
-            "type": "function",
-            "function": {
-                "name": "test_function",
-                "description": "A test function",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "test_param": {
-                            "type": "string",
-                            "description": "A test parameter",
-                        }
-                    },
-                },
-            },
-        }
-    ]
-
-    # Mock llm_args with Azure OpenAI specific parameters
-    llm_args = {
-        "api_version": "2024-02-15-preview",
-        "base_url": "https://your-resource.openai.azure.com/",
-        "api_key": "your-api-key",
-    }
-
-    try:
-        # Test 1: Only llm_args
-        print("Testing Agent with only llm_args...")
-        Agent(
-            agent_name="test-agent-1",
-            model_name="gpt-4o-mini",
-            llm_args=llm_args,
-        )
-        print("✓ Agent with only llm_args created successfully")
-
-        # Test 2: Only tools_list_dictionary
-        print("Testing Agent with only tools_list_dictionary...")
-        Agent(
-            agent_name="test-agent-2",
-            model_name="gpt-4o-mini",
-            tools_list_dictionary=tools_list,
-        )
-        print(
-            "✓ Agent with only tools_list_dictionary created successfully"
-        )
-
-        # Test 3: Combined llm_args and tools_list_dictionary
-        print(
-            "Testing Agent with combined llm_args and tools_list_dictionary..."
-        )
-        agent3 = Agent(
-            agent_name="test-agent-3",
-            model_name="gpt-4o-mini",
-            llm_args=llm_args,
-            tools_list_dictionary=tools_list,
-        )
-        print(
-            "✓ Agent with combined llm_args and tools_list_dictionary created successfully"
-        )
-
-        # Test 4: Verify that the LLM instance has the correct configuration
-        print("Verifying LLM configuration...")
-
-        # Check that agent3 has both llm_args and tools configured
-        assert agent3.llm_args == llm_args, "llm_args not preserved"
-        assert (
-            agent3.tools_list_dictionary == tools_list
-        ), "tools_list_dictionary not preserved"
-
-        # Check that the LLM instance was created
-        assert agent3.llm is not None, "LLM instance not created"
-
-        print("✓ LLM configuration verified successfully")
-
-        # Test 5: Test that the LLM can be called (without actually making API calls)
-        print("Testing LLM call preparation...")
-        try:
-            # This should not fail due to configuration issues
-            # We're not actually calling the API, just testing the setup
-            print("✓ LLM call preparation successful")
-        except Exception as e:
-            print(f"✗ LLM call preparation failed: {e}")
-            return False
-
-        print(
-            "\n🎉 All tests passed! The LiteLLM initialization fix is working correctly."
-        )
-        return True
-
-    except Exception as e:
-        print(f"✗ Test failed: {e}")
-        import traceback
-
-        traceback.print_exc()
-        return False
-
-
-def test_azure_openai_example():
-    """Test the Azure OpenAI example with api_version parameter."""
-
-    print("\nTesting Azure OpenAI example with api_version...")
-
-    try:
-        # Create an agent with Azure OpenAI configuration
-        agent = Agent(
-            agent_name="azure-test-agent",
-            model_name="azure/gpt-4o",
-            llm_args={
-                "api_version": "2024-02-15-preview",
-                "base_url": "https://your-resource.openai.azure.com/",
-                "api_key": "your-api-key",
-            },
-            tools_list_dictionary=[
-                {
-                    "type": "function",
-                    "function": {
-                        "name": "get_weather",
-                        "description": "Get weather information",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {
-                                "location": {
-                                    "type": "string",
-                                    "description": "The city and state",
-                                }
-                            },
-                        },
-                    },
-                }
-            ],
-        )
-
-        print(
-            "✓ Azure OpenAI agent with combined parameters created successfully"
-        )
-
-        # Verify configuration
-        assert agent.llm_args is not None, "llm_args not set"
-        assert (
-            "api_version" in agent.llm_args
-        ), "api_version not in llm_args"
-        assert (
-            agent.tools_list_dictionary is not None
-        ), "tools_list_dictionary not set"
-        assert (
-            len(agent.tools_list_dictionary) > 0
-        ), "tools_list_dictionary is empty"
-
-        print("✓ Azure OpenAI configuration verified")
-        return True
-
-    except Exception as e:
-        print(f"✗ Azure OpenAI test failed: {e}")
-        import traceback
-
-        traceback.print_exc()
-        return False
-
-
-if __name__ == "__main__":
-    print("🧪 Testing LiteLLM initialization fix...")
-
-    success1 = test_combined_llm_args()
-    success2 = test_azure_openai_example()
-
-    if success1 and success2:
-        print("\n✅ All tests passed! The fix is working correctly.")
-        sys.exit(0)
-    else:
-        print(
-            "\n❌ Some tests failed. Please check the implementation."
-        )
-        sys.exit(1)
diff --git a/tests/agent/agents/test_llm_handling_args.py b/tests/agent/agents/test_llm_handling_args.py
deleted file mode 100644
index 4678714c..00000000
--- a/tests/agent/agents/test_llm_handling_args.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from swarms.structs.agent import Agent
-
-
-def test_llm_handling_args_kwargs():
-    """Test that llm_handling properly handles both args and kwargs."""
-
-    # Create an agent instance
-    agent = Agent(
-        agent_name="test-agent",
-        model_name="gpt-4o-mini",
-        temperature=0.7,
-        max_tokens=1000,
-    )
-
-    # Test 1: Call llm_handling with kwargs
-    print("Test 1: Testing kwargs handling...")
-    try:
-        # This should work and add the kwargs to additional_args
-        agent.llm_handling(top_p=0.9, frequency_penalty=0.1)
-        print("✓ kwargs handling works")
-    except Exception as e:
-        print(f"✗ kwargs handling failed: {e}")
-
-    # Test 2: Call llm_handling with args (dictionary)
-    print("\nTest 2: Testing args handling with dictionary...")
-    try:
-        # This should merge the dictionary into additional_args
-        additional_config = {
-            "presence_penalty": 0.2,
-            "logit_bias": {"123": 1},
-        }
-        agent.llm_handling(additional_config)
-        print("✓ args handling with dictionary works")
-    except Exception as e:
-        print(f"✗ args handling with dictionary failed: {e}")
-
-    # Test 3: Call llm_handling with both args and kwargs
-    print("\nTest 3: Testing both args and kwargs...")
-    try:
-        # This should handle both
-        additional_config = {"presence_penalty": 0.3}
-        agent.llm_handling(
-            additional_config, top_p=0.8, frequency_penalty=0.2
-        )
-        print("✓ combined args and kwargs handling works")
-    except Exception as e:
-        print(f"✗ combined args and kwargs handling failed: {e}")
-
-    # Test 4: Call llm_handling with non-dictionary args
-    print("\nTest 4: Testing non-dictionary args...")
-    try:
-        # This should store args under 'additional_args' key
-        agent.llm_handling(
-            "some_string", 123, ["list", "of", "items"]
-        )
-        print("✓ non-dictionary args handling works")
-    except Exception as e:
-        print(f"✗ non-dictionary args handling failed: {e}")
-
-
-if __name__ == "__main__":
-    test_llm_handling_args_kwargs()
diff --git a/tests/agent/agents/test_tool_agent.py b/tests/agent/agents/test_tool_agent.py
deleted file mode 100644
index 11aca6bf..00000000
--- a/tests/agent/agents/test_tool_agent.py
+++ /dev/null
@@ -1,230 +0,0 @@
-from unittest.mock import Mock, patch
-import pytest
-
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from swarms import ToolAgent
-from swarms.agents.exceptions import (
-    ToolExecutionError,
-    ToolNotFoundError,
-    ToolParameterError,
-)
-
-
-def test_tool_agent_init():
-    model = Mock(spec=AutoModelForCausalLM)
-    tokenizer = Mock(spec=AutoTokenizer)
-    json_schema = {
-        "type": "object",
-        "properties": {
-            "name": {"type": "string"},
-            "age": {"type": "number"},
-            "is_student": {"type": "boolean"},
-            "courses": {"type": "array", "items": {"type": "string"}},
-        },
-    }
-    name = "Test Agent"
-    description = "This is a test agent"
-
-    agent = ToolAgent(
-        name, description, model, tokenizer, json_schema
-    )
-
-    assert agent.name == name
-    assert agent.description == description
-    assert agent.model == model
-    assert agent.tokenizer == tokenizer
-    assert agent.json_schema == json_schema
-
-
-@patch.object(ToolAgent, "run")
-def test_tool_agent_run(mock_run):
-    model = Mock(spec=AutoModelForCausalLM)
-    tokenizer = Mock(spec=AutoTokenizer)
-    json_schema = {
-        "type": "object",
-        "properties": {
-            "name": {"type": "string"},
-            "age": {"type": "number"},
-            "is_student": {"type": "boolean"},
-            "courses": {"type": "array", "items": {"type": "string"}},
-        },
-    }
-    name = "Test Agent"
-    description = "This is a test agent"
-    task = (
-        "Generate a person's information based on the following"
-        " schema:"
-    )
-
-    agent = ToolAgent(
-        name, description, model, tokenizer, json_schema
-    )
-    agent.run(task)
-
-    mock_run.assert_called_once_with(task)
-
-
-def test_tool_agent_init_with_kwargs():
-    model = Mock(spec=AutoModelForCausalLM)
-    tokenizer = Mock(spec=AutoTokenizer)
-    json_schema = {
-        "type": "object",
-        "properties": {
-            "name": {"type": "string"},
-            "age": {"type": "number"},
-            "is_student": {"type": "boolean"},
-            "courses": {"type": "array", "items": {"type": "string"}},
-        },
-    }
-    name = "Test Agent"
-    description = "This is a test agent"
-
-    kwargs = {
-        "debug": True,
-        "max_array_length": 20,
-        "max_number_tokens": 12,
-        "temperature": 0.5,
-        "max_string_token_length": 20,
-    }
-
-    agent = ToolAgent(
-        name, description, model, tokenizer, json_schema, **kwargs
-    )
-
-    assert agent.name == name
-    assert agent.description == description
-    assert agent.model == model
-    assert agent.tokenizer == tokenizer
-    assert agent.json_schema == json_schema
-    assert agent.debug == kwargs["debug"]
-    assert agent.max_array_length == kwargs["max_array_length"]
-    assert agent.max_number_tokens == kwargs["max_number_tokens"]
-    assert agent.temperature == kwargs["temperature"]
-    assert (
-        agent.max_string_token_length
-        == kwargs["max_string_token_length"]
-    )
-
-
-def test_tool_agent_initialization():
-    """Test tool agent initialization with valid parameters."""
-    agent = ToolAgent(
-        model_name="test-model", temperature=0.7, max_tokens=1000
-    )
-    assert agent.model_name == "test-model"
-    assert agent.temperature == 0.7
-    assert agent.max_tokens == 1000
-    assert agent.retry_attempts == 3
-    assert agent.retry_interval == 1.0
-
-
-def test_tool_agent_initialization_error():
-    """Test tool agent initialization with invalid model."""
-    with pytest.raises(ToolExecutionError) as exc_info:
-        ToolAgent(model_name="invalid-model")
-    assert "model_initialization" in str(exc_info.value)
-
-
-def test_tool_validation():
-    """Test tool parameter validation."""
-    tools_list = [
-        {
-            "name": "test_tool",
-            "parameters": [
-                {"name": "required_param", "required": True},
-                {"name": "optional_param", "required": False},
-            ],
-        }
-    ]
-
-    agent = ToolAgent(tools_list_dictionary=tools_list)
-
-    # Test missing required parameter
-    with pytest.raises(ToolParameterError) as exc_info:
-        agent._validate_tool("test_tool", {})
-    assert "Missing required parameters" in str(exc_info.value)
-
-    # Test valid parameters
-    agent._validate_tool("test_tool", {"required_param": "value"})
-
-    # Test non-existent tool
-    with pytest.raises(ToolNotFoundError) as exc_info:
-        agent._validate_tool("non_existent_tool", {})
-    assert "Tool 'non_existent_tool' not found" in str(exc_info.value)
-
-
-def test_retry_mechanism():
-    """Test retry mechanism for failed operations."""
-    mock_llm = Mock()
-    mock_llm.generate.side_effect = [
-        Exception("First attempt failed"),
-        Exception("Second attempt failed"),
-        Mock(outputs=[Mock(text="Success")]),
-    ]
-
-    agent = ToolAgent(model_name="test-model")
-    agent.llm = mock_llm
-
-    # Test successful retry
-    result = agent.run("test task")
-    assert result == "Success"
-    assert mock_llm.generate.call_count == 3
-
-    # Test all retries failing
-    mock_llm.generate.side_effect = Exception("All attempts failed")
-    with pytest.raises(ToolExecutionError) as exc_info:
-        agent.run("test task")
-    assert "All attempts failed" in str(exc_info.value)
-
-
-def test_batched_execution():
-    """Test batched execution with error handling."""
-    mock_llm = Mock()
-    mock_llm.generate.side_effect = [
-        Mock(outputs=[Mock(text="Success 1")]),
-        Exception("Task 2 failed"),
-        Mock(outputs=[Mock(text="Success 3")]),
-    ]
-
-    agent = ToolAgent(model_name="test-model")
-    agent.llm = mock_llm
-
-    tasks = ["Task 1", "Task 2", "Task 3"]
-    results = agent.batched_run(tasks)
-
-    assert len(results) == 3
-    assert results[0] == "Success 1"
-    assert "Error" in results[1]
-    assert results[2] == "Success 3"
-
-
-def test_prompt_preparation():
-    """Test prompt preparation with and without system prompt."""
-    # Test without system prompt
-    agent = ToolAgent()
-    prompt = agent._prepare_prompt("test task")
-    assert prompt == "User: test task\nAssistant:"
-
-    # Test with system prompt
-    agent = ToolAgent(system_prompt="You are a helpful assistant")
-    prompt = agent._prepare_prompt("test task")
-    assert (
-        prompt
-        == "You are a helpful assistant\n\nUser: test task\nAssistant:"
-    )
-
-
-def test_tool_execution_error_handling():
-    """Test error handling during tool execution."""
-    agent = ToolAgent(model_name="test-model")
-    agent.llm = None  # Simulate uninitialized LLM
-
-    with pytest.raises(ToolExecutionError) as exc_info:
-        agent.run("test task")
-    assert "LLM not initialized" in str(exc_info.value)
-
-    # Test with invalid parameters
-    with pytest.raises(ToolExecutionError) as exc_info:
-        agent.run("test task", invalid_param="value")
-    assert "Error running task" in str(exc_info.value)
diff --git a/tests/agent/benchmark_agent/test_agent_benchmark_init.py b/tests/agent/benchmark_agent/test_agent_benchmark_init.py
deleted file mode 100644
index 5f852576..00000000
--- a/tests/agent/benchmark_agent/test_agent_benchmark_init.py
+++ /dev/null
@@ -1,171 +0,0 @@
-from time import perf_counter_ns
-import psutil
-import os
-from rich.panel import Panel
-from rich.console import Console
-from rich.table import Table
-from statistics import mean, median, stdev, variance
-from swarms.structs.agent import Agent
-from swarms.prompts.finance_agent_sys_prompt import (
-    FINANCIAL_AGENT_SYS_PROMPT,
-)
-
-
-def get_memory_stats(memory_readings):
-    """Calculate memory statistics"""
-    return {
-        "peak": max(memory_readings),
-        "min": min(memory_readings),
-        "mean": mean(memory_readings),
-        "median": median(memory_readings),
-        "stdev": (
-            stdev(memory_readings) if len(memory_readings) > 1 else 0
-        ),
-        "variance": (
-            variance(memory_readings)
-            if len(memory_readings) > 1
-            else 0
-        ),
-    }
-
-
-def get_time_stats(times):
-    """Calculate time statistics"""
-    return {
-        "total": sum(times),
-        "mean": mean(times),
-        "median": median(times),
-        "min": min(times),
-        "max": max(times),
-        "stdev": stdev(times) if len(times) > 1 else 0,
-        "variance": variance(times) if len(times) > 1 else 0,
-    }
-
-
-def benchmark_multiple_agents(num_agents=100):
-    console = Console()
-    init_times = []
-    memory_readings = []
-    process = psutil.Process(os.getpid())
-
-    # Create benchmark tables
-    time_table = Table(title="Time Statistics")
-    time_table.add_column("Metric", style="cyan")
-    time_table.add_column("Value", style="green")
-
-    memory_table = Table(title="Memory Statistics")
-    memory_table.add_column("Metric", style="cyan")
-    memory_table.add_column("Value", style="green")
-
-    initial_memory = process.memory_info().rss / 1024
-    start_total_time = perf_counter_ns()
-
-    # Initialize agents and measure performance
-    for i in range(num_agents):
-        start_time = perf_counter_ns()
-
-        Agent(
-            agent_name=f"Financial-Analysis-Agent-{i}",
-            agent_description="Personal finance advisor agent",
-            system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
-            max_loops=2,
-            model_name="gpt-4o-mini",
-            dynamic_temperature_enabled=True,
-            interactive=False,
-        )
-
-        init_time = (perf_counter_ns() - start_time) / 1_000_000
-        init_times.append(init_time)
-
-        current_memory = process.memory_info().rss / 1024
-        memory_readings.append(current_memory - initial_memory)
-
-        if (i + 1) % 10 == 0:
-            console.print(
-                f"Created {i + 1} agents...", style="bold blue"
-            )
-
-    total_elapsed_time = (
-        perf_counter_ns() - start_total_time
-    ) / 1_000_000
-
-    # Calculate statistics
-    time_stats = get_time_stats(init_times)
-    memory_stats = get_memory_stats(memory_readings)
-
-    # Add time measurements
-    time_table.add_row(
-        "Total Wall Time", f"{total_elapsed_time:.2f} ms"
-    )
-    time_table.add_row(
-        "Total Init Time", f"{time_stats['total']:.2f} ms"
-    )
-    time_table.add_row(
-        "Average Init Time", f"{time_stats['mean']:.2f} ms"
-    )
-    time_table.add_row(
-        "Median Init Time", f"{time_stats['median']:.2f} ms"
-    )
-    time_table.add_row("Fastest Init", f"{time_stats['min']:.2f} ms")
-    time_table.add_row("Slowest Init", f"{time_stats['max']:.2f} ms")
-    time_table.add_row(
-        "Std Deviation", f"{time_stats['stdev']:.2f} ms"
-    )
-    time_table.add_row(
-        "Variance", f"{time_stats['variance']:.4f} ms²"
-    )
-    time_table.add_row(
-        "Throughput",
-        f"{(num_agents/total_elapsed_time) * 1000:.2f} agents/second",
-    )
-    time_table.add_row(
-        "Agents per Minute",
-        f"{(num_agents/total_elapsed_time) * 60000:.0f} agents/minute",
-    )
-
-    # Add memory measurements
-    memory_table.add_row(
-        "Peak Memory Usage", f"{memory_stats['peak']:.2f} KB"
-    )
-    memory_table.add_row(
-        "Minimum Memory Usage", f"{memory_stats['min']:.2f} KB"
-    )
-    memory_table.add_row(
-        "Average Memory Usage", f"{memory_stats['mean']:.2f} KB"
-    )
-    memory_table.add_row(
-        "Median Memory Usage", f"{memory_stats['median']:.2f} KB"
-    )
-    memory_table.add_row(
-        "Memory Std Deviation", f"{memory_stats['stdev']:.2f} KB"
-    )
-    memory_table.add_row(
-        "Memory Variance", f"{memory_stats['variance']:.2f} KB²"
-    )
-    memory_table.add_row(
-        "Avg Memory Per Agent",
-        f"{memory_stats['mean']/num_agents:.2f} KB",
-    )
-
-    # Create and display panels
-    time_panel = Panel(
-        time_table,
-        title="Time Benchmark Results",
-        border_style="blue",
-        padding=(1, 2),
-    )
-
-    memory_panel = Panel(
-        memory_table,
-        title="Memory Benchmark Results",
-        border_style="green",
-        padding=(1, 2),
-    )
-
-    console.print(time_panel)
-    console.print("\n")
-    console.print(memory_panel)
-
-
-if __name__ == "__main__":
-    benchmark_multiple_agents(1000)
diff --git a/tests/agent/benchmark_agent/test_agent_exec_benchmark.py b/tests/agent/benchmark_agent/test_agent_exec_benchmark.py
deleted file mode 100644
index 11872304..00000000
--- a/tests/agent/benchmark_agent/test_agent_exec_benchmark.py
+++ /dev/null
@@ -1,284 +0,0 @@
-import asyncio
-import concurrent.futures
-import json
-import os
-import psutil
-import datetime
-from pathlib import Path
-from typing import List, Dict, Any, Optional
-from swarms.structs.agent import Agent
-from loguru import logger
-
-
-class AgentBenchmark:
-    def __init__(
-        self,
-        num_iterations: int = 5,
-        output_dir: str = "benchmark_results",
-    ):
-        self.num_iterations = num_iterations
-        self.output_dir = Path(output_dir)
-        self.output_dir.mkdir(exist_ok=True)
-
-        # Use process pool for CPU-bound tasks
-        self.process_pool = concurrent.futures.ProcessPoolExecutor(
-            max_workers=min(os.cpu_count(), 4)
-        )
-
-        # Use thread pool for I/O-bound tasks
-        self.thread_pool = concurrent.futures.ThreadPoolExecutor(
-            max_workers=min(os.cpu_count() * 2, 8)
-        )
-
-        self.default_queries = [
-            "Conduct an analysis of the best real undervalued ETFs",
-            "What are the top performing tech stocks this quarter?",
-            "Analyze current market trends in renewable energy sector",
-            "Compare Bitcoin and Ethereum investment potential",
-            "Evaluate the risk factors in emerging markets",
-        ]
-
-        self.agent = self._initialize_agent()
-        self.process = psutil.Process()
-
-        # Cache for storing repeated query results
-        self._query_cache = {}
-
-    def _initialize_agent(self) -> Agent:
-        return Agent(
-            agent_name="Financial-Analysis-Agent",
-            agent_description="Personal finance advisor agent",
-            # system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
-            max_loops=1,
-            model_name="gpt-4o-mini",
-            dynamic_temperature_enabled=True,
-            interactive=False,
-        )
-
-    def _get_system_metrics(self) -> Dict[str, float]:
-        # Optimized system metrics collection
-        return {
-            "cpu_percent": self.process.cpu_percent(),
-            "memory_mb": self.process.memory_info().rss / 1024 / 1024,
-        }
-
-    def _calculate_statistics(
-        self, values: List[float]
-    ) -> Dict[str, float]:
-        if not values:
-            return {}
-
-        sorted_values = sorted(values)
-        n = len(sorted_values)
-        mean_val = sum(values) / n
-
-        stats = {
-            "mean": mean_val,
-            "median": sorted_values[n // 2],
-            "min": sorted_values[0],
-            "max": sorted_values[-1],
-        }
-
-        # Only calculate stdev if we have enough values
-        if n > 1:
-            stats["std_dev"] = (
-                sum((x - mean_val) ** 2 for x in values) / n
-            ) ** 0.5
-
-        return {k: round(v, 3) for k, v in stats.items()}
-
-    async def process_iteration(
-        self, query: str, iteration: int
-    ) -> Dict[str, Any]:
-        """Process a single iteration of a query"""
-        try:
-            # Check cache for repeated queries
-            cache_key = f"{query}_{iteration}"
-            if cache_key in self._query_cache:
-                return self._query_cache[cache_key]
-
-            iteration_start = datetime.datetime.now()
-            pre_metrics = self._get_system_metrics()
-
-            # Run the agent
-            try:
-                self.agent.run(query)
-                success = True
-            except Exception as e:
-                str(e)
-                success = False
-
-            execution_time = (
-                datetime.datetime.now() - iteration_start
-            ).total_seconds()
-            post_metrics = self._get_system_metrics()
-
-            result = {
-                "execution_time": execution_time,
-                "success": success,
-                "pre_metrics": pre_metrics,
-                "post_metrics": post_metrics,
-                "iteration_data": {
-                    "iteration": iteration + 1,
-                    "execution_time": round(execution_time, 3),
-                    "success": success,
-                    "system_metrics": {
-                        "pre": pre_metrics,
-                        "post": post_metrics,
-                    },
-                },
-            }
-
-            # Cache the result
-            self._query_cache[cache_key] = result
-            return result
-
-        except Exception as e:
-            logger.error(f"Error in iteration {iteration}: {e}")
-            raise
-
-    async def run_benchmark(
-        self, queries: Optional[List[str]] = None
-    ) -> Dict[str, Any]:
-        """Run the benchmark asynchronously"""
-        queries = queries or self.default_queries
-        benchmark_data = {
-            "metadata": {
-                "timestamp": datetime.datetime.now().isoformat(),
-                "num_iterations": self.num_iterations,
-                "agent_config": {
-                    "model_name": self.agent.model_name,
-                    "max_loops": self.agent.max_loops,
-                },
-            },
-            "results": {},
-        }
-
-        async def process_query(query: str):
-            query_results = {
-                "execution_times": [],
-                "system_metrics": [],
-                "iterations": [],
-            }
-
-            # Process iterations concurrently
-            tasks = [
-                self.process_iteration(query, i)
-                for i in range(self.num_iterations)
-            ]
-            iteration_results = await asyncio.gather(*tasks)
-
-            for result in iteration_results:
-                query_results["execution_times"].append(
-                    result["execution_time"]
-                )
-                query_results["system_metrics"].append(
-                    result["post_metrics"]
-                )
-                query_results["iterations"].append(
-                    result["iteration_data"]
-                )
-
-            # Calculate statistics
-            query_results["statistics"] = {
-                "execution_time": self._calculate_statistics(
-                    query_results["execution_times"]
-                ),
-                "memory_usage": self._calculate_statistics(
-                    [
-                        m["memory_mb"]
-                        for m in query_results["system_metrics"]
-                    ]
-                ),
-                "cpu_usage": self._calculate_statistics(
-                    [
-                        m["cpu_percent"]
-                        for m in query_results["system_metrics"]
-                    ]
-                ),
-            }
-
-            return query, query_results
-
-        # Execute all queries concurrently
-        query_tasks = [process_query(query) for query in queries]
-        query_results = await asyncio.gather(*query_tasks)
-
-        for query, results in query_results:
-            benchmark_data["results"][query] = results
-
-        return benchmark_data
-
-    def save_results(self, benchmark_data: Dict[str, Any]) -> str:
-        """Save benchmark results efficiently"""
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename = (
-            self.output_dir / f"benchmark_results_{timestamp}.json"
-        )
-
-        # Write results in a single operation
-        with open(filename, "w") as f:
-            json.dump(benchmark_data, f, indent=2)
-
-        logger.info(f"Benchmark results saved to: {filename}")
-        return str(filename)
-
-    def print_summary(self, results: Dict[str, Any]):
-        """Print a summary of the benchmark results"""
-        print("\n=== Benchmark Summary ===")
-        for query, data in results["results"].items():
-            print(f"\nQuery: {query[:50]}...")
-            stats = data["statistics"]["execution_time"]
-            print(f"Average time: {stats['mean']:.2f}s")
-            print(
-                f"Memory usage (avg): {data['statistics']['memory_usage']['mean']:.1f}MB"
-            )
-            print(
-                f"CPU usage (avg): {data['statistics']['cpu_usage']['mean']:.1f}%"
-            )
-
-    async def run_with_timeout(
-        self, timeout: int = 300
-    ) -> Dict[str, Any]:
-        """Run benchmark with timeout"""
-        try:
-            return await asyncio.wait_for(
-                self.run_benchmark(), timeout
-            )
-        except asyncio.TimeoutError:
-            logger.error(
-                f"Benchmark timed out after {timeout} seconds"
-            )
-            raise
-
-    def cleanup(self):
-        """Cleanup resources"""
-        self.process_pool.shutdown()
-        self.thread_pool.shutdown()
-        self._query_cache.clear()
-
-
-async def main():
-    try:
-        # Create and run benchmark
-        benchmark = AgentBenchmark(num_iterations=1)
-
-        # Run benchmark with timeout
-        results = await benchmark.run_with_timeout(timeout=300)
-
-        # Save results
-        benchmark.save_results(results)
-
-        # Print summary
-        benchmark.print_summary(results)
-
-    except Exception as e:
-        logger.error(f"Benchmark failed: {e}")
-    finally:
-        # Cleanup resources
-        benchmark.cleanup()
-
-
-if __name__ == "__main__":
-    # Run the async main function
-    asyncio.run(main())
diff --git a/tests/agent/benchmark_agent/test_auto_test_eval.py b/tests/agent/benchmark_agent/test_auto_test_eval.py
deleted file mode 100644
index 0b4d799a..00000000
--- a/tests/agent/benchmark_agent/test_auto_test_eval.py
+++ /dev/null
@@ -1,318 +0,0 @@
-import json
-import os
-import platform
-import sys
-import traceback
-from dataclasses import dataclass
-from datetime import datetime
-from typing import Any, Dict, List, Optional
-
-import psutil
-import requests
-from loguru import logger
-from swarm_models import OpenAIChat
-
-from swarms.structs.agent import Agent
-
-
-@dataclass
-class SwarmSystemInfo:
-    """System information for Swarms issue reports."""
-
-    os_name: str
-    os_version: str
-    python_version: str
-    cpu_usage: float
-    memory_usage: float
-    disk_usage: float
-    swarms_version: str  # Added Swarms version tracking
-    cuda_available: bool  # Added CUDA availability check
-    gpu_info: Optional[str]  # Added GPU information
-
-
-class SwarmsIssueReporter:
-    """
-    Production-grade GitHub issue reporter specifically designed for the Swarms library.
-    Automatically creates detailed issues for the https://github.com/kyegomez/swarms repository.
-
-    Features:
-    - Swarms-specific error categorization
-    - Automatic version and dependency tracking
-    - CUDA and GPU information collection
-    - Integration with Swarms logging system
-    - Detailed environment information
-    """
-
-    REPO_OWNER = "kyegomez"
-    REPO_NAME = "swarms"
-    ISSUE_CATEGORIES = {
-        "agent": ["agent", "automation"],
-        "memory": ["memory", "storage"],
-        "tool": ["tools", "integration"],
-        "llm": ["llm", "model"],
-        "performance": ["performance", "optimization"],
-        "compatibility": ["compatibility", "environment"],
-    }
-
-    def __init__(
-        self,
-        github_token: str,
-        rate_limit: int = 10,
-        rate_period: int = 3600,
-        log_file: str = "swarms_issues.log",
-        enable_duplicate_check: bool = True,
-    ):
-        """
-        Initialize the Swarms Issue Reporter.
-
-        Args:
-            github_token (str): GitHub personal access token
-            rate_limit (int): Maximum number of issues to create per rate_period
-            rate_period (int): Time period for rate limiting in seconds
-            log_file (str): Path to log file
-            enable_duplicate_check (bool): Whether to check for duplicate issues
-        """
-        self.github_token = github_token
-        self.rate_limit = rate_limit
-        self.rate_period = rate_period
-        self.enable_duplicate_check = enable_duplicate_check
-        self.github_token = os.getenv("GITHUB_API_KEY")
-
-        # Initialize logging
-        log_path = os.path.join(os.getcwd(), "logs", log_file)
-        os.makedirs(os.path.dirname(log_path), exist_ok=True)
-
-        # Issue tracking
-        self.issues_created = []
-        self.last_issue_time = datetime.now()
-
-    def _get_swarms_version(self) -> str:
-        """Get the installed version of Swarms."""
-        try:
-            import swarms
-
-            return swarms.__version__
-        except:
-            return "Unknown"
-
-    def _get_system_info(self) -> SwarmSystemInfo:
-        """Collect system and Swarms-specific information."""
-
-        return SwarmSystemInfo(
-            os_name=platform.system(),
-            os_version=platform.version(),
-            python_version=sys.version,
-            cpu_usage=psutil.cpu_percent(),
-            memory_usage=psutil.virtual_memory().percent,
-            disk_usage=psutil.disk_usage("/").percent,
-            swarms_version=self._get_swarms_version(),
-        )
-
-    def _categorize_error(
-        self, error: Exception, context: Dict
-    ) -> List[str]:
-        """Categorize the error and return appropriate labels."""
-        error_str = str(error).lower()
-        type(error).__name__
-
-        labels = ["bug", "automated"]
-
-        # Check error message and context for category keywords
-        for (
-            category,
-            category_labels,
-        ) in self.ISSUE_CATEGORIES.items():
-            if any(
-                keyword in error_str for keyword in category_labels
-            ):
-                labels.extend(category_labels)
-                break
-
-        # Add severity label based on error type
-        if issubclass(type(error), (SystemError, MemoryError)):
-            labels.append("severity:critical")
-        elif issubclass(type(error), (ValueError, TypeError)):
-            labels.append("severity:medium")
-        else:
-            labels.append("severity:low")
-
-        return list(set(labels))  # Remove duplicates
-
-    def _format_swarms_issue_body(
-        self,
-        error: Exception,
-        system_info: SwarmSystemInfo,
-        context: Dict,
-    ) -> str:
-        """Format the issue body with Swarms-specific information."""
-        return f"""
-        ## Swarms Error Report
-        - **Error Type**: {type(error).__name__}
-        - **Error Message**: {str(error)}
-        - **Swarms Version**: {system_info.swarms_version}
-
-        ## Environment Information
-        - **OS**: {system_info.os_name} {system_info.os_version}
-        - **Python Version**: {system_info.python_version}
-        - **CUDA Available**: {system_info.cuda_available}
-        - **GPU**: {system_info.gpu_info or "N/A"}
-        - **CPU Usage**: {system_info.cpu_usage}%
-        - **Memory Usage**: {system_info.memory_usage}%
-        - **Disk Usage**: {system_info.disk_usage}%
-
-        ## Stack Trace
-        {traceback.format_exc()}
-
-        ## Context
-        {json.dumps(context, indent=2)}
-
-        ## Dependencies
-        {self._get_dependencies_info()}
-
-        ## Time of Occurrence
-        {datetime.now().isoformat()}
-
-        ---
-        *This issue was automatically generated by SwarmsIssueReporter*
-        """
-
-    def _get_dependencies_info(self) -> str:
-        """Get information about installed dependencies."""
-        try:
-            import pkg_resources
-
-            deps = []
-            for dist in pkg_resources.working_set:
-                deps.append(f"- {dist.key} {dist.version}")
-            return "\n".join(deps)
-        except:
-            return "Unable to fetch dependency information"
-
-    # First, add this method to your SwarmsIssueReporter class
-    def _check_rate_limit(self) -> bool:
-        """Check if we're within rate limits."""
-        now = datetime.now()
-        time_diff = (now - self.last_issue_time).total_seconds()
-
-        if (
-            len(self.issues_created) >= self.rate_limit
-            and time_diff < self.rate_period
-        ):
-            logger.warning("Rate limit exceeded for issue creation")
-            return False
-
-        # Clean up old issues from tracking
-        self.issues_created = [
-            time
-            for time in self.issues_created
-            if (now - time).total_seconds() < self.rate_period
-        ]
-
-        return True
-
-    def report_swarms_issue(
-        self,
-        error: Exception,
-        agent: Optional[Agent] = None,
-        context: Dict[str, Any] = None,
-        priority: str = "normal",
-    ) -> Optional[int]:
-        """
-        Report a Swarms-specific issue to GitHub.
-
-        Args:
-            error (Exception): The exception to report
-            agent (Optional[Agent]): The Swarms agent instance that encountered the error
-            context (Dict[str, Any]): Additional context about the error
-            priority (str): Issue priority ("low", "normal", "high", "critical")
-
-        Returns:
-            Optional[int]: Issue number if created successfully
-        """
-        try:
-            if not self._check_rate_limit():
-                logger.warning(
-                    "Skipping issue creation due to rate limit"
-                )
-                return None
-
-            # Collect system information
-            system_info = self._get_system_info()
-
-            # Prepare context with agent information if available
-            full_context = context or {}
-            if agent:
-                full_context.update(
-                    {
-                        "agent_name": agent.agent_name,
-                        "agent_description": agent.agent_description,
-                        "max_loops": agent.max_loops,
-                        "context_length": agent.context_length,
-                    }
-                )
-
-            # Create issue title
-            title = f"[{type(error).__name__}] {str(error)[:100]}"
-            if agent:
-                title = f"[Agent: {agent.agent_name}] {title}"
-
-            # Get appropriate labels
-            labels = self._categorize_error(error, full_context)
-            labels.append(f"priority:{priority}")
-
-            # Create the issue
-            url = f"https://api.github.com/repos/{self.REPO_OWNER}/{self.REPO_NAME}/issues"
-            data = {
-                "title": title,
-                "body": self._format_swarms_issue_body(
-                    error, system_info, full_context
-                ),
-                "labels": labels,
-            }
-
-            response = requests.post(
-                url,
-                headers={
-                    "Authorization": f"token {self.github_token}"
-                },
-                json=data,
-            )
-            response.raise_for_status()
-
-            issue_number = response.json()["number"]
-            logger.info(
-                f"Successfully created Swarms issue #{issue_number}"
-            )
-
-            return issue_number
-
-        except Exception as e:
-            logger.error(f"Error creating Swarms issue: {str(e)}")
-            return None
-
-
-# Setup the reporter with your GitHub token
-reporter = SwarmsIssueReporter(
-    github_token=os.getenv("GITHUB_API_KEY")
-)
-
-
-# Force an error to test the reporter
-try:
-    # This will raise an error since the input isn't valid
-    # Create an agent that might have issues
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(agent_name="Test-Agent", max_loops=1)
-
-    result = agent.run(None)
-
-    raise ValueError("test")
-except Exception as e:
-    # Report the issue
-    issue_number = reporter.report_swarms_issue(
-        error=e,
-        agent=agent,
-        context={"task": "test_run"},
-        priority="high",
-    )
-    print(f"Created issue number: {issue_number}")
diff --git a/tests/agent/benchmark_agent/test_github_summarizer_agent.py b/tests/agent/benchmark_agent/test_github_summarizer_agent.py
deleted file mode 100644
index 6c852b28..00000000
--- a/tests/agent/benchmark_agent/test_github_summarizer_agent.py
+++ /dev/null
@@ -1,180 +0,0 @@
-import requests
-import datetime
-from typing import List, Dict, Tuple
-from loguru import logger
-from swarms import Agent
-from swarm_models import OpenAIChat
-
-# GitHub API Configurations
-GITHUB_REPO = "kyegomez/swarms"  # Swarms GitHub repository
-GITHUB_API_URL = f"https://api.github.com/repos/{GITHUB_REPO}/commits"
-
-
-# Step 1: Fetch the latest commits from GitHub
-def fetch_latest_commits(
-    repo_url: str, limit: int = 5
-) -> List[Dict[str, str]]:
-    """
-    Fetch the latest commits from a public GitHub repository.
-    """
-    logger.info(
-        f"Fetching the latest {limit} commits from {repo_url}"
-    )
-    try:
-        params = {"per_page": limit}
-        response = requests.get(repo_url, params=params)
-        response.raise_for_status()
-
-        commits = response.json()
-        commit_data = []
-
-        for commit in commits:
-            commit_data.append(
-                {
-                    "sha": commit["sha"][:7],  # Short commit hash
-                    "author": commit["commit"]["author"]["name"],
-                    "message": commit["commit"]["message"],
-                    "date": commit["commit"]["author"]["date"],
-                }
-            )
-
-        logger.success("Successfully fetched commit data")
-        return commit_data
-
-    except Exception as e:
-        logger.error(f"Error fetching commits: {e}")
-        raise
-
-
-# Step 2: Format commits and fetch current time
-def format_commits_with_time(
-    commits: List[Dict[str, str]],
-) -> Tuple[str, str]:
-    """
-    Format commit data into a readable string and return current time.
-    """
-    current_time = datetime.datetime.now().strftime(
-        "%Y-%m-%d %H:%M:%S"
-    )
-    logger.info(f"Formatting commits at {current_time}")
-
-    commit_summary = "\n".join(
-        [
-            f"- `{commit['sha']}` by {commit['author']} on {commit['date']}: {commit['message']}"
-            for commit in commits
-        ]
-    )
-
-    logger.success("Commits formatted successfully")
-    return current_time, commit_summary
-
-
-# Step 3: Build a dynamic system prompt
-def build_custom_system_prompt(
-    current_time: str, commit_summary: str
-) -> str:
-    """
-    Build a dynamic system prompt with the current time and commit summary.
-    """
-    logger.info("Building the custom system prompt for the agent")
-    prompt = f"""
-You are a software analyst tasked with summarizing the latest commits from the Swarms GitHub repository.
-
-The current time is **{current_time}**.
-
-Here are the latest commits:
-{commit_summary}
-
-**Your task**: 
-1. Summarize the changes into a clear and concise table in **markdown format**.
-2. Highlight the key improvements and fixes.
-3. End your output with the token `<DONE>`.
-
-Make sure the table includes the following columns: Commit SHA, Author, Date, and Commit Message.
-"""
-    logger.success("System prompt created successfully")
-    return prompt
-
-
-# Step 4: Initialize the Agent
-def initialize_agent() -> Agent:
-    """
-    Initialize the Swarms agent with OpenAI model.
-    """
-    logger.info("Initializing the agent with GPT-4o")
-    model = OpenAIChat(model_name="gpt-4.1")
-
-    agent = Agent(
-        agent_name="Commit-Summarization-Agent",
-        agent_description="Fetch and summarize GitHub commits for Swarms repository.",
-        system_prompt="",  # Will set dynamically
-        max_loops=1,
-        llm=model,
-        dynamic_temperature_enabled=True,
-        user_name="Kye",
-        retry_attempts=3,
-        context_length=8192,
-        return_step_meta=False,
-        output_type="str",
-        auto_generate_prompt=False,
-        max_tokens=4000,
-        stopping_token="<DONE>",
-        interactive=False,
-    )
-    logger.success("Agent initialized successfully")
-    return agent
-
-
-# Step 5: Run the Agent with Data
-def summarize_commits_with_agent(agent: Agent, prompt: str) -> str:
-    """
-    Pass the system prompt to the agent and fetch the result.
-    """
-    logger.info("Sending data to the agent for summarization")
-    try:
-        result = agent.run(
-            f"{prompt}",
-            all_cores=True,
-        )
-        logger.success("Agent completed the summarization task")
-        return result
-    except Exception as e:
-        logger.error(f"Agent encountered an error: {e}")
-        raise
-
-
-# Main Execution
-if __name__ == "__main__":
-    try:
-        logger.info("Starting commit summarization process")
-
-        # Fetch latest commits
-        latest_commits = fetch_latest_commits(GITHUB_API_URL, limit=5)
-
-        # Format commits and get current time
-        current_time, commit_summary = format_commits_with_time(
-            latest_commits
-        )
-
-        # Build the custom system prompt
-        custom_system_prompt = build_custom_system_prompt(
-            current_time, commit_summary
-        )
-
-        # Initialize agent
-        agent = initialize_agent()
-
-        # Set the dynamic system prompt
-        agent.system_prompt = custom_system_prompt
-
-        # Run the agent and summarize commits
-        result = summarize_commits_with_agent(
-            agent, custom_system_prompt
-        )
-
-        # Print the result
-        print("### Commit Summary in Markdown:")
-        print(result)
-
-    except Exception as e:
-        logger.critical(f"Process failed: {e}")
diff --git a/tests/agent/benchmark_agent/test_profiling_agent.py b/tests/agent/benchmark_agent/test_profiling_agent.py
deleted file mode 100644
index 4b7dbd70..00000000
--- a/tests/agent/benchmark_agent/test_profiling_agent.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import os
-import uuid
-from swarms import Agent
-from swarm_models import OpenAIChat
-from swarms.prompts.finance_agent_sys_prompt import (
-    FINANCIAL_AGENT_SYS_PROMPT,
-)
-import time
-
-start_time = time.time()
-
-
-# Get the OpenAI API key from the environment variable
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Create an instance of the OpenAIChat class
-model = OpenAIChat(
-    api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
-)
-
-
-agent = Agent(
-    agent_name=f"{uuid.uuid4().hex}",
-    system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
-    llm=model,
-    max_loops=1,
-    autosave=True,
-    dashboard=False,
-    verbose=True,
-    dynamic_temperature_enabled=True,
-    saved_state_path=f"{uuid.uuid4().hex}",
-    user_name="swarms_corp",
-    retry_attempts=1,
-    context_length=3000,
-    return_step_meta=False,
-)
-
-out = agent.run(
-    "How can I establish a ROTH IRA to buy stocks and get a tax break? What are the criteria"
-)
-print(out)
-
-end_time = time.time()
-
-print(f"Execution time: {end_time - start_time} seconds")
-# Execution time: 9.922541856765747 seconds for the whole script
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 19782870..f097d0fb 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -2,4 +2,14 @@ swarms
 pytest
 matplotlib
 loguru
-unittest
\ No newline at end of file
+psutil
+pyyaml
+python-dotenv
+rich
+pydantic
+numpy
+pandas
+openpyxl
+seaborn
+requests
+swarms-memory
\ No newline at end of file
diff --git a/tests/structs/test_agent.py b/tests/structs/test_agent.py
index 1c4c7971..417c77c7 100644
--- a/tests/structs/test_agent.py
+++ b/tests/structs/test_agent.py
@@ -1,1334 +1,2479 @@
+import asyncio
 import json
 import os
-from unittest import mock
+import tempfile
+import time
+import unittest
+from statistics import mean, median, stdev, variance
 from unittest.mock import MagicMock, patch
 
+import psutil
 import pytest
+import yaml
 from dotenv import load_dotenv
+from rich.console import Console
+from rich.table import Table
 
-from swarm_models import OpenAIChat
-from swarms.structs.agent import Agent, stop_when_repeats
-from swarms.utils.loguru_logger import logger
+from swarms import (
+    Agent,
+    create_agents_from_yaml,
+)
 
+# Load environment variables
 load_dotenv()
 
+# Global test configuration
 openai_api_key = os.getenv("OPENAI_API_KEY")
 
 
-# Mocks and Fixtures
-@pytest.fixture
-def mocked_llm():
-    return OpenAIChat(
-        openai_api_key=openai_api_key,
-    )
+# ============================================================================
+# FIXTURES AND UTILITIES
+# ============================================================================
 
 
 @pytest.fixture
 def basic_flow(mocked_llm):
-    return Agent(llm=mocked_llm, max_loops=5)
+    """Basic agent flow for testing"""
+    return Agent(llm=mocked_llm, max_loops=1)
 
 
 @pytest.fixture
 def flow_with_condition(mocked_llm):
+    """Agent flow with stopping condition"""
+    from swarms.structs.agent import stop_when_repeats
+
     return Agent(
         llm=mocked_llm,
-        max_loops=5,
+        max_loops=1,
         stopping_condition=stop_when_repeats,
     )
 
 
-# Basic Tests
-def test_stop_when_repeats():
-    assert stop_when_repeats("Please Stop now")
-    assert not stop_when_repeats("Continue the process")
-
-
-def test_flow_initialization(basic_flow):
-    assert basic_flow.max_loops == 5
-    assert basic_flow.stopping_condition is None
-    assert basic_flow.loop_interval == 1
-    assert basic_flow.retry_attempts == 3
-    assert basic_flow.retry_interval == 1
-    assert basic_flow.feedback == []
-    assert basic_flow.memory == []
-    assert basic_flow.task is None
-    assert basic_flow.stopping_token == "<DONE>"
-    assert not basic_flow.interactive
-
-
-def test_provide_feedback(basic_flow):
-    feedback = "Test feedback"
-    basic_flow.provide_feedback(feedback)
-    assert feedback in basic_flow.feedback
-
-
-@patch("time.sleep", return_value=None)  # to speed up tests
-def test_run_without_stopping_condition(mocked_sleep, basic_flow):
-    response = basic_flow.run("Test task")
-    assert (
-        response == "Test task"
-    )  # since our mocked llm doesn't modify the response
-
-
-@patch("time.sleep", return_value=None)  # to speed up tests
-def test_run_with_stopping_condition(
-    mocked_sleep, flow_with_condition
-):
-    response = flow_with_condition.run("Stop")
-    assert response == "Stop"
-
-
-@patch("time.sleep", return_value=None)  # to speed up tests
-def test_run_with_exception(mocked_sleep, basic_flow):
-    basic_flow.llm.side_effect = Exception("Test Exception")
-    with pytest.raises(Exception, match="Test Exception"):
-        basic_flow.run("Test task")
-
-
-def test_bulk_run(basic_flow):
-    inputs = [{"task": "Test1"}, {"task": "Test2"}]
-    responses = basic_flow.bulk_run(inputs)
-    assert responses == ["Test1", "Test2"]
-
-
-# Tests involving file IO
-def test_save_and_load(basic_flow, tmp_path):
-    file_path = tmp_path / "memory.json"
-    basic_flow.memory.append(["Test1", "Test2"])
-    basic_flow.save(file_path)
-
-    new_flow = Agent(llm=mocked_llm, max_loops=5)
-    new_flow.load(file_path)
-    assert new_flow.memory == [["Test1", "Test2"]]
-
-
-# Environment variable mock test
-def test_env_variable_handling(monkeypatch):
-    monkeypatch.setenv("API_KEY", "test_key")
-    assert os.getenv("API_KEY") == "test_key"
-
-
-# TODO: Add more tests, especially edge cases and exception cases. Implement parametrized tests for varied inputs.
-
-
-# Test initializing the agent with different stopping conditions
-def test_flow_with_custom_stopping_condition(mocked_llm):
-    def stopping_condition(x):
-        return "terminate" in x.lower()
-
-    agent = Agent(
-        llm=mocked_llm,
-        max_loops=5,
-        stopping_condition=stopping_condition,
-    )
-    assert agent.stopping_condition("Please terminate now")
-    assert not agent.stopping_condition("Continue the process")
-
-
-# Test calling the agent directly
-def test_flow_call(basic_flow):
-    response = basic_flow("Test call")
-    assert response == "Test call"
-
-
-# Test formatting the prompt
-def test_format_prompt(basic_flow):
-    formatted_prompt = basic_flow.format_prompt(
-        "Hello {name}", name="John"
-    )
-    assert formatted_prompt == "Hello John"
-
-
-# Test with max loops
-@patch("time.sleep", return_value=None)
-def test_max_loops(mocked_sleep, basic_flow):
-    basic_flow.max_loops = 3
-    response = basic_flow.run("Looping")
-    assert response == "Looping"
-
-
-# Test stopping token
-@patch("time.sleep", return_value=None)
-def test_stopping_token(mocked_sleep, basic_flow):
-    basic_flow.stopping_token = "Terminate"
-    response = basic_flow.run("Loop until Terminate")
-    assert response == "Loop until Terminate"
-
-
-# Test interactive mode
-def test_interactive(basic_flow):
-    basic_flow.interactive = True
-    assert basic_flow.interactive
-
-
-# Test bulk run with varied inputs
-def test_bulk_run_varied_inputs(basic_flow):
-    inputs = [
-        {"task": "Test1"},
-        {"task": "Test2"},
-        {"task": "Stop now"},
-    ]
-    responses = basic_flow.bulk_run(inputs)
-    assert responses == ["Test1", "Test2", "Stop now"]
-
-
-# Test loading non-existent file
-def test_load_non_existent_file(basic_flow, tmp_path):
-    file_path = tmp_path / "non_existent.json"
-    with pytest.raises(FileNotFoundError):
-        basic_flow.load(file_path)
-
-
-# Test saving with different memory data
-def test_save_different_memory(basic_flow, tmp_path):
-    file_path = tmp_path / "memory.json"
-    basic_flow.memory.append(["Task1", "Task2", "Task3"])
-    basic_flow.save(file_path)
-    with open(file_path) as f:
-        data = json.load(f)
-    assert data == [["Task1", "Task2", "Task3"]]
-
-
-# Test the stopping condition check
-def test_check_stopping_condition(flow_with_condition):
-    assert flow_with_condition._check_stopping_condition(
-        "Stop this process"
-    )
-    assert not flow_with_condition._check_stopping_condition(
-        "Continue the task"
-    )
-
-
-# Test without providing max loops (default value should be 5)
-def test_default_max_loops(mocked_llm):
-    agent = Agent(llm=mocked_llm)
-    assert agent.max_loops == 5
-
-
-# Test creating agent from llm and template
-def test_from_llm_and_template(mocked_llm):
-    agent = Agent.from_llm_and_template(mocked_llm, "Test template")
-    assert isinstance(agent, Agent)
-
+@pytest.fixture
+def mock_agents():
+    """Mock agents for testing"""
 
-# Mocking the OpenAIChat for testing
-@patch("swarms.models.OpenAIChat", autospec=True)
-def test_mocked_openai_chat(MockedOpenAIChat):
-    llm = MockedOpenAIChat(openai_api_key=openai_api_key)
-    llm.return_value = MagicMock()
-    agent = Agent(llm=llm, max_loops=5)
-    agent.run("Mocked run")
-    assert MockedOpenAIChat.called
+    class MockAgent:
+        def __init__(self, name):
+            self.name = name
+            self.agent_name = name
 
+        def run(self, task, img=None, *args, **kwargs):
+            return f"{self.name} processed {task}"
 
-# Test retry attempts
-@patch("time.sleep", return_value=None)
-def test_retry_attempts(mocked_sleep, basic_flow):
-    basic_flow.retry_attempts = 2
-    basic_flow.llm.side_effect = [
-        Exception("Test Exception"),
-        "Valid response",
+    return [
+        MockAgent(name="Agent1"),
+        MockAgent(name="Agent2"),
+        MockAgent(name="Agent3"),
     ]
-    response = basic_flow.run("Test retry")
-    assert response == "Valid response"
-
-
-# Test different loop intervals
-@patch("time.sleep", return_value=None)
-def test_different_loop_intervals(mocked_sleep, basic_flow):
-    basic_flow.loop_interval = 2
-    response = basic_flow.run("Test loop interval")
-    assert response == "Test loop interval"
-
-
-# Test different retry intervals
-@patch("time.sleep", return_value=None)
-def test_different_retry_intervals(mocked_sleep, basic_flow):
-    basic_flow.retry_interval = 2
-    response = basic_flow.run("Test retry interval")
-    assert response == "Test retry interval"
-
-
-# Test invoking the agent with additional kwargs
-@patch("time.sleep", return_value=None)
-def test_flow_call_with_kwargs(mocked_sleep, basic_flow):
-    response = basic_flow(
-        "Test call", param1="value1", param2="value2"
-    )
-    assert response == "Test call"
-
-
-# Test initializing the agent with all parameters
-def test_flow_initialization_all_params(mocked_llm):
-    agent = Agent(
-        llm=mocked_llm,
-        max_loops=10,
-        stopping_condition=stop_when_repeats,
-        loop_interval=2,
-        retry_attempts=4,
-        retry_interval=2,
-        interactive=True,
-        param1="value1",
-        param2="value2",
-    )
-    assert agent.max_loops == 10
-    assert agent.loop_interval == 2
-    assert agent.retry_attempts == 4
-    assert agent.retry_interval == 2
-    assert agent.interactive
-
-
-# Test the stopping token is in the response
-@patch("time.sleep", return_value=None)
-def test_stopping_token_in_response(mocked_sleep, basic_flow):
-    response = basic_flow.run("Test stopping token")
-    assert basic_flow.stopping_token in response
 
 
 @pytest.fixture
-def flow_instance():
-    # Create an instance of the Agent class with required parameters for testing
-    # You may need to adjust this based on your actual class initialization
-    llm = OpenAIChat(
-        openai_api_key=openai_api_key,
-    )
-    agent = Agent(
-        llm=llm,
-        max_loops=5,
-        interactive=False,
-        dashboard=False,
-        dynamic_temperature=False,
-    )
-    return agent
-
-
-def test_flow_run(flow_instance):
-    # Test the basic run method of the Agent class
-    response = flow_instance.run("Test task")
-    assert isinstance(response, str)
-    assert len(response) > 0
-
-
-def test_flow_interactive(flow_instance):
-    # Test the interactive mode of the Agent class
-    flow_instance.interactive = True
-    response = flow_instance.run("Test task")
-    assert isinstance(response, str)
-    assert len(response) > 0
-
-
-def test_flow_dashboard_mode(flow_instance):
-    # Test the dashboard mode of the Agent class
-    flow_instance.dashboard = True
-    response = flow_instance.run("Test task")
-    assert isinstance(response, str)
-    assert len(response) > 0
-
-
-def test_flow_autosave(flow_instance):
-    # Test the autosave functionality of the Agent class
-    flow_instance.autosave = True
-    response = flow_instance.run("Test task")
-    assert isinstance(response, str)
-    assert len(response) > 0
-    # Ensure that the state is saved (you may need to implement this logic)
-    assert flow_instance.saved_state_path is not None
-
-
-def test_flow_response_filtering(flow_instance):
-    # Test the response filtering functionality
-    flow_instance.add_response_filter("filter_this")
-    response = flow_instance.filtered_run(
-        "This message should filter_this"
-    )
-    assert "filter_this" not in response
-
-
-def test_flow_undo_last(flow_instance):
-    # Test the undo functionality
-    response1 = flow_instance.run("Task 1")
-    flow_instance.run("Task 2")
-    previous_state, message = flow_instance.undo_last()
-    assert response1 == previous_state
-    assert "Restored to" in message
-
-
-def test_flow_dynamic_temperature(flow_instance):
-    # Test dynamic temperature adjustment
-    flow_instance.dynamic_temperature = True
-    response = flow_instance.run("Test task")
-    assert isinstance(response, str)
-    assert len(response) > 0
-
-
-def test_flow_streamed_generation(flow_instance):
-    # Test streamed generation
-    response = flow_instance.streamed_generation("Generating...")
-    assert isinstance(response, str)
-    assert len(response) > 0
-
-
-def test_flow_step(flow_instance):
-    # Test the step method
-    response = flow_instance.step("Test step")
-    assert isinstance(response, str)
-    assert len(response) > 0
-
-
-def test_flow_graceful_shutdown(flow_instance):
-    # Test graceful shutdown
-    result = flow_instance.graceful_shutdown()
-    assert result is not None
-
-
-# Add more test cases as needed to cover various aspects of your Agent class
-
-
-def test_flow_max_loops(flow_instance):
-    # Test setting and getting the maximum number of loops
-    flow_instance.set_max_loops(10)
-    assert flow_instance.get_max_loops() == 10
-
-
-def test_flow_autosave_path(flow_instance):
-    # Test setting and getting the autosave path
-    flow_instance.set_autosave_path("text.txt")
-    assert flow_instance.get_autosave_path() == "txt.txt"
-
-
-def test_flow_response_length(flow_instance):
-    # Test checking the length of the response
-    response = flow_instance.run(
-        "Generate a 10,000 word long blog on mental clarity and the"
-        " benefits of meditation."
+def test_agent():
+    """Create a real agent for testing"""
+    with patch("swarms.structs.agent.LiteLLM") as mock_llm:
+        mock_llm.return_value.run.return_value = "Test response"
+        return Agent(
+            agent_name="test_agent",
+            agent_description="A test agent",
+            system_prompt="You are a test agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            verbose=False,
+            print_on=False,
+        )
+
+
+# ============================================================================
+# BASIC AGENT TESTS
+# ============================================================================
+
+
+class TestBasicAgent:
+    """Test basic agent functionality"""
+
+    def test_stop_when_repeats(self):
+        """Test stopping condition function"""
+        from swarms.structs.agent import stop_when_repeats
+
+        assert stop_when_repeats("Please Stop now")
+        assert not stop_when_repeats("Continue the process")
+
+    def test_flow_initialization(self, basic_flow):
+        """Test agent initialization"""
+        assert basic_flow.max_loops == 5
+        assert basic_flow.stopping_condition is None
+        assert basic_flow.loop_interval == 1
+        assert basic_flow.retry_attempts == 3
+        assert basic_flow.retry_interval == 1
+        assert basic_flow.feedback == []
+        assert basic_flow.memory == []
+        assert basic_flow.task is None
+        assert basic_flow.stopping_token == "<DONE>"
+        assert not basic_flow.interactive
+
+    def test_provide_feedback(self, basic_flow):
+        """Test feedback functionality"""
+        feedback = "Test feedback"
+        basic_flow.provide_feedback(feedback)
+        assert feedback in basic_flow.feedback
+
+    @patch("time.sleep", return_value=None)
+    def test_run_without_stopping_condition(
+        self, mocked_sleep, basic_flow
+    ):
+        """Test running without stopping condition"""
+        response = basic_flow.run("Test task")
+        assert response is not None
+
+    @patch("time.sleep", return_value=None)
+    def test_run_with_stopping_condition(
+        self, mocked_sleep, flow_with_condition
+    ):
+        """Test running with stopping condition"""
+        response = flow_with_condition.run("Stop")
+        assert response is not None
+
+    def test_bulk_run(self, basic_flow):
+        """Test bulk run functionality"""
+        inputs = [{"task": "Test1"}, {"task": "Test2"}]
+        responses = basic_flow.bulk_run(inputs)
+        assert responses is not None
+
+    def test_save_and_load(self, basic_flow, tmp_path):
+        """Test save and load functionality"""
+        file_path = tmp_path / "memory.json"
+        basic_flow.memory.append(["Test1", "Test2"])
+        basic_flow.save(file_path)
+
+        new_flow = Agent(llm=basic_flow.llm, max_loops=5)
+        new_flow.load(file_path)
+        assert new_flow.memory == [["Test1", "Test2"]]
+
+    def test_flow_call(self, basic_flow):
+        """Test calling agent directly"""
+        response = basic_flow("Test call")
+        assert response == "Test call"
+
+    def test_format_prompt(self, basic_flow):
+        """Test prompt formatting"""
+        formatted_prompt = basic_flow.format_prompt(
+            "Hello {name}", name="John"
+        )
+        assert formatted_prompt == "Hello John"
+
+
+# ============================================================================
+# AGENT FEATURES TESTS
+# ============================================================================
+
+
+class TestAgentFeatures:
+    """Test advanced agent features"""
+
+    def test_basic_agent_functionality(self):
+        """Test basic agent initialization and task execution"""
+        print("\nTesting basic agent functionality...")
+
+        agent = Agent(
+            agent_name="Test-Agent", model_name="gpt-4.1", max_loops=1
+        )
+
+        response = agent.run("What is 2+2?")
+        assert (
+            response is not None
+        ), "Agent response should not be None"
+
+        # Test agent properties
+        assert (
+            agent.agent_name == "Test-Agent"
+        ), "Agent name not set correctly"
+        assert agent.max_loops == 1, "Max loops not set correctly"
+        assert agent.llm is not None, "LLM not initialized"
+
+        print("✓ Basic agent functionality test passed")
+
+    def test_memory_management(self):
+        """Test agent memory management functionality"""
+        print("\nTesting memory management...")
+
+        agent = Agent(
+            agent_name="Memory-Test-Agent",
+            max_loops=1,
+            model_name="gpt-4.1",
+            context_length=8192,
+        )
+
+        # Test adding to memory
+        agent.add_memory("Test memory entry")
+        assert (
+            "Test memory entry"
+            in agent.short_memory.return_history_as_string()
+        )
+
+        # Test memory query
+        agent.memory_query("Test query")
+
+        # Test token counting
+        tokens = agent.check_available_tokens()
+        assert isinstance(
+            tokens, int
+        ), "Token count should be an integer"
+
+        print("✓ Memory management test passed")
+
+    def test_agent_output_formats(self):
+        """Test all available output formats"""
+        print("\nTesting all output formats...")
+
+        test_task = "Say hello!"
+
+        output_types = {
+            "str": str,
+            "string": str,
+            "list": str,  # JSON string containing list
+            "json": str,  # JSON string
+            "dict": dict,
+            "yaml": str,
+        }
+
+        for output_type, expected_type in output_types.items():
+            agent = Agent(
+                agent_name=f"{output_type.capitalize()}-Output-Agent",
+                model_name="gpt-4.1",
+                max_loops=1,
+                output_type=output_type,
+            )
+
+            response = agent.run(test_task)
+            assert (
+                response is not None
+            ), f"{output_type} output should not be None"
+
+            if output_type == "yaml":
+                # Verify YAML can be parsed
+                try:
+                    yaml.safe_load(response)
+                    print(f"✓ {output_type} output valid")
+                except yaml.YAMLError:
+                    assert (
+                        False
+                    ), f"Invalid YAML output for {output_type}"
+            elif output_type in ["json", "list"]:
+                # Verify JSON can be parsed
+                try:
+                    json.loads(response)
+                    print(f"✓ {output_type} output valid")
+                except json.JSONDecodeError:
+                    assert (
+                        False
+                    ), f"Invalid JSON output for {output_type}"
+
+        print("✓ Output formats test passed")
+
+    def test_agent_state_management(self):
+        """Test comprehensive state management functionality"""
+        print("\nTesting state management...")
+
+        # Create temporary directory for test files
+        with tempfile.TemporaryDirectory() as temp_dir:
+            state_path = os.path.join(temp_dir, "agent_state.json")
+
+            # Create agent with initial state
+            agent1 = Agent(
+                agent_name="State-Test-Agent",
+                model_name="gpt-4.1",
+                max_loops=1,
+                saved_state_path=state_path,
+            )
+
+            # Add some data to the agent
+            agent1.run("Remember this: Test message 1")
+            agent1.add_memory("Test message 2")
+
+            # Save state
+            agent1.save()
+            assert os.path.exists(
+                state_path
+            ), "State file not created"
+
+            # Create new agent and load state
+            agent2 = Agent(
+                agent_name="State-Test-Agent",
+                model_name="gpt-4.1",
+                max_loops=1,
+            )
+            agent2.load(state_path)
+
+            # Verify state loaded correctly
+            history2 = agent2.short_memory.return_history_as_string()
+            assert (
+                "Test message 1" in history2
+            ), "State not loaded correctly"
+            assert (
+                "Test message 2" in history2
+            ), "Memory not loaded correctly"
+
+            # Test autosave functionality
+            agent3 = Agent(
+                agent_name="Autosave-Test-Agent",
+                model_name="gpt-4.1",
+                max_loops=1,
+                saved_state_path=os.path.join(
+                    temp_dir, "autosave_state.json"
+                ),
+                autosave=True,
+            )
+
+            agent3.run("Test autosave")
+            time.sleep(2)  # Wait for autosave
+            assert os.path.exists(
+                os.path.join(temp_dir, "autosave_state.json")
+            ), "Autosave file not created"
+
+        print("✓ State management test passed")
+
+    def test_agent_tools_and_execution(self):
+        """Test agent tool handling and execution"""
+        print("\nTesting tools and execution...")
+
+        def sample_tool(x: int, y: int) -> int:
+            """Sample tool that adds two numbers"""
+            return x + y
+
+        agent = Agent(
+            agent_name="Tools-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+            tools=[sample_tool],
+        )
+
+        # Test adding tools
+        agent.add_tool(lambda x: x * 2)
+        assert len(agent.tools) == 2, "Tool not added correctly"
+
+        # Test removing tools
+        agent.remove_tool(sample_tool)
+        assert len(agent.tools) == 1, "Tool not removed correctly"
+
+        # Test tool execution
+        response = agent.run("Calculate 2 + 2 using the sample tool")
+        assert response is not None, "Tool execution failed"
+
+        print("✓ Tools and execution test passed")
+
+    def test_agent_concurrent_execution(self):
+        """Test agent concurrent execution capabilities"""
+        print("\nTesting concurrent execution...")
+
+        agent = Agent(
+            agent_name="Concurrent-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+        )
+
+        # Test bulk run
+        tasks = [
+            {"task": "Count to 3"},
+            {"task": "Say hello"},
+            {"task": "Tell a short joke"},
+        ]
+
+        responses = agent.bulk_run(tasks)
+        assert len(responses) == len(tasks), "Not all tasks completed"
+        assert all(
+            response is not None for response in responses
+        ), "Some tasks failed"
+
+        # Test concurrent tasks
+        concurrent_responses = agent.run_concurrent_tasks(
+            ["Task 1", "Task 2", "Task 3"]
+        )
+        assert (
+            len(concurrent_responses) == 3
+        ), "Not all concurrent tasks completed"
+
+        print("✓ Concurrent execution test passed")
+
+    def test_agent_error_handling(self):
+        """Test agent error handling and recovery"""
+        print("\nTesting error handling...")
+
+        agent = Agent(
+            agent_name="Error-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+            retry_attempts=3,
+            retry_interval=1,
+        )
+
+        # Test invalid tool execution
+        try:
+            agent.parse_and_execute_tools("invalid_json")
+            print("✓ Invalid tool execution handled")
+        except Exception:
+            assert True, "Expected error caught"
+
+        # Test recovery after error
+        response = agent.run("Continue after error")
+        assert (
+            response is not None
+        ), "Agent failed to recover after error"
+
+        print("✓ Error handling test passed")
+
+    def test_agent_configuration(self):
+        """Test agent configuration and parameters"""
+        print("\nTesting agent configuration...")
+
+        agent = Agent(
+            agent_name="Config-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+            temperature=0.7,
+            max_tokens=4000,
+            context_length=8192,
+        )
+
+        # Test configuration methods
+        agent.update_system_prompt("New system prompt")
+        agent.update_max_loops(2)
+        agent.update_loop_interval(2)
+
+        # Verify updates
+        assert agent.max_loops == 2, "Max loops not updated"
+        assert agent.loop_interval == 2, "Loop interval not updated"
+
+        # Test configuration export
+        config_dict = agent.to_dict()
+        assert isinstance(
+            config_dict, dict
+        ), "Configuration export failed"
+
+        # Test YAML export
+        yaml_config = agent.to_yaml()
+        assert isinstance(yaml_config, str), "YAML export failed"
+
+        print("✓ Configuration test passed")
+
+    def test_agent_with_stopping_condition(self):
+        """Test agent with custom stopping condition"""
+        print("\nTesting agent with stopping condition...")
+
+        def custom_stopping_condition(response: str) -> bool:
+            return "STOP" in response.upper()
+
+        agent = Agent(
+            agent_name="Stopping-Condition-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+            stopping_condition=custom_stopping_condition,
+        )
+
+        response = agent.run("Count up until you see the word STOP")
+        assert response is not None, "Stopping condition test failed"
+        print("✓ Stopping condition test passed")
+
+    def test_agent_with_retry_mechanism(self):
+        """Test agent retry mechanism"""
+        print("\nTesting agent retry mechanism...")
+
+        agent = Agent(
+            agent_name="Retry-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+            retry_attempts=3,
+            retry_interval=1,
+        )
+
+        response = agent.run("Tell me a joke.")
+        assert response is not None, "Retry mechanism test failed"
+        print("✓ Retry mechanism test passed")
+
+    def test_bulk_and_filtered_operations(self):
+        """Test bulk operations and response filtering"""
+        print("\nTesting bulk and filtered operations...")
+
+        agent = Agent(
+            agent_name="Bulk-Filter-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+        )
+
+        # Test bulk run
+        bulk_tasks = [
+            {"task": "What is 2+2?"},
+            {"task": "Name a color"},
+            {"task": "Count to 3"},
+        ]
+        bulk_responses = agent.bulk_run(bulk_tasks)
+        assert len(bulk_responses) == len(
+            bulk_tasks
+        ), "Bulk run should return same number of responses as tasks"
+
+        # Test response filtering
+        agent.add_response_filter("color")
+        filtered_response = agent.filtered_run(
+            "What is your favorite color?"
+        )
+        assert (
+            "[FILTERED]" in filtered_response
+        ), "Response filter not applied"
+
+        print("✓ Bulk and filtered operations test passed")
+
+    async def test_async_operations(self):
+        """Test asynchronous operations"""
+        print("\nTesting async operations...")
+
+        agent = Agent(
+            agent_name="Async-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+        )
+
+        # Test single async run
+        response = await agent.arun("What is 1+1?")
+        assert response is not None, "Async run failed"
+
+        # Test concurrent async runs
+        tasks = ["Task 1", "Task 2", "Task 3"]
+        responses = await asyncio.gather(
+            *[agent.arun(task) for task in tasks]
+        )
+        assert len(responses) == len(
+            tasks
+        ), "Not all async tasks completed"
+
+        print("✓ Async operations test passed")
+
+    def test_memory_and_state_persistence(self):
+        """Test memory management and state persistence"""
+        print("\nTesting memory and state persistence...")
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            state_path = os.path.join(temp_dir, "test_state.json")
+
+            # Create agent with memory configuration
+            agent1 = Agent(
+                agent_name="Memory-State-Test-Agent",
+                model_name="gpt-4.1",
+                max_loops=1,
+                saved_state_path=state_path,
+                context_length=8192,
+                autosave=True,
+            )
+
+            # Test memory operations
+            agent1.add_memory("Important fact: The sky is blue")
+            agent1.memory_query("What color is the sky?")
+
+            # Save state
+            agent1.save()
+
+            # Create new agent and load state
+            agent2 = Agent(
+                agent_name="Memory-State-Test-Agent",
+                model_name="gpt-4.1",
+                max_loops=1,
+            )
+            agent2.load(state_path)
+
+            # Verify memory persistence
+            memory_content = (
+                agent2.short_memory.return_history_as_string()
+            )
+            assert (
+                "sky is blue" in memory_content
+            ), "Memory not properly persisted"
+
+            print("✓ Memory and state persistence test passed")
+
+    def test_sentiment_and_evaluation(self):
+        """Test sentiment analysis and response evaluation"""
+        print("\nTesting sentiment analysis and evaluation...")
+
+        def mock_sentiment_analyzer(text):
+            """Mock sentiment analyzer that returns a score between 0 and 1"""
+            return 0.7 if "positive" in text.lower() else 0.3
+
+        def mock_evaluator(response):
+            """Mock evaluator that checks response quality"""
+            return "GOOD" if len(response) > 10 else "BAD"
+
+        agent = Agent(
+            agent_name="Sentiment-Eval-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+            sentiment_analyzer=mock_sentiment_analyzer,
+            sentiment_threshold=0.5,
+            evaluator=mock_evaluator,
+        )
+
+        # Test sentiment analysis
+        agent.run("Generate a positive message")
+
+        # Test evaluation
+        agent.run("Generate a detailed response")
+
+        print("✓ Sentiment and evaluation test passed")
+
+    def test_tool_management(self):
+        """Test tool management functionality"""
+        print("\nTesting tool management...")
+
+        def tool1(x: int) -> int:
+            """Sample tool 1"""
+            return x * 2
+
+        def tool2(x: int) -> int:
+            """Sample tool 2"""
+            return x + 2
+
+        agent = Agent(
+            agent_name="Tool-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+            tools=[tool1],
+        )
+
+        # Test adding tools
+        agent.add_tool(tool2)
+        assert len(agent.tools) == 2, "Tool not added correctly"
+
+        # Test removing tools
+        agent.remove_tool(tool1)
+        assert len(agent.tools) == 1, "Tool not removed correctly"
+
+        # Test adding multiple tools
+        agent.add_tools([tool1, tool2])
+        assert (
+            len(agent.tools) == 3
+        ), "Multiple tools not added correctly"
+
+        print("✓ Tool management test passed")
+
+    def test_system_prompt_and_configuration(self):
+        """Test system prompt and configuration updates"""
+        print("\nTesting system prompt and configuration...")
+
+        agent = Agent(
+            agent_name="Config-Test-Agent",
+            model_name="gpt-4.1",
+            max_loops=1,
+        )
+
+        # Test updating system prompt
+        new_prompt = "You are a helpful assistant."
+        agent.update_system_prompt(new_prompt)
+        assert (
+            agent.system_prompt == new_prompt
+        ), "System prompt not updated"
+
+        # Test configuration updates
+        agent.update_max_loops(5)
+        assert agent.max_loops == 5, "Max loops not updated"
+
+        agent.update_loop_interval(2)
+        assert agent.loop_interval == 2, "Loop interval not updated"
+
+        # Test configuration export
+        config_dict = agent.to_dict()
+        assert isinstance(
+            config_dict, dict
+        ), "Configuration export failed"
+
+        print("✓ System prompt and configuration test passed")
+
+    def test_agent_with_dynamic_temperature(self):
+        """Test agent with dynamic temperature"""
+        print("\nTesting agent with dynamic temperature...")
+
+        agent = Agent(
+            agent_name="Dynamic-Temp-Agent",
+            model_name="gpt-4.1",
+            max_loops=2,
+            dynamic_temperature_enabled=True,
+        )
+
+        response = agent.run("Generate a creative story.")
+        assert response is not None, "Dynamic temperature test failed"
+        print("✓ Dynamic temperature test passed")
+
+
+# ============================================================================
+# AGENT LOGGING TESTS
+# ============================================================================
+
+
+class TestAgentLogging:
+    """Test agent logging functionality"""
+
+    def setUp(self):
+        """Set up test fixtures"""
+        self.mock_tokenizer = MagicMock()
+        self.mock_tokenizer.count_tokens.return_value = 100
+
+        self.mock_short_memory = MagicMock()
+        self.mock_short_memory.get_memory_stats.return_value = {
+            "message_count": 2
+        }
+
+        self.mock_long_memory = MagicMock()
+        self.mock_long_memory.get_memory_stats.return_value = {
+            "item_count": 5
+        }
+
+        self.agent = Agent(
+            tokenizer=self.mock_tokenizer,
+            short_memory=self.mock_short_memory,
+            long_term_memory=self.mock_long_memory,
+        )
+
+    def test_log_step_metadata_basic(self):
+        """Test basic step metadata logging"""
+        log_result = self.agent.log_step_metadata(
+            1, "Test prompt", "Test response"
+        )
+
+        assert "step_id" in log_result
+        assert "timestamp" in log_result
+        assert "tokens" in log_result
+        assert "memory_usage" in log_result
+
+        assert log_result["tokens"]["total"] == 200
+
+    def test_log_step_metadata_no_long_term_memory(self):
+        """Test step metadata logging without long term memory"""
+        self.agent.long_term_memory = None
+        log_result = self.agent.log_step_metadata(
+            1, "prompt", "response"
+        )
+        assert log_result["memory_usage"]["long_term"] == {}
+
+    def test_log_step_metadata_timestamp(self):
+        """Test step metadata logging timestamp"""
+        log_result = self.agent.log_step_metadata(
+            1, "prompt", "response"
+        )
+        assert "timestamp" in log_result
+
+    def test_token_counting_integration(self):
+        """Test token counting integration"""
+        self.mock_tokenizer.count_tokens.side_effect = [150, 250]
+        log_result = self.agent.log_step_metadata(
+            1, "prompt", "response"
+        )
+
+        assert log_result["tokens"]["total"] == 400
+
+    def test_agent_output_updating(self):
+        """Test agent output updating"""
+        initial_total_tokens = sum(
+            step["tokens"]["total"]
+            for step in self.agent.agent_output.steps
+        )
+        self.agent.log_step_metadata(1, "prompt", "response")
+
+        final_total_tokens = sum(
+            step["tokens"]["total"]
+            for step in self.agent.agent_output.steps
+        )
+        assert final_total_tokens - initial_total_tokens == 200
+        assert len(self.agent.agent_output.steps) == 1
+
+    def test_full_logging_cycle(self):
+        """Test full logging cycle"""
+        agent = Agent(agent_name="test-agent")
+        task = "Test task"
+        max_loops = 1
+
+        result = agent._run(task, max_loops=max_loops)
+
+        assert isinstance(result, dict)
+        assert "steps" in result
+        assert isinstance(result["steps"], list)
+        assert len(result["steps"]) == max_loops
+
+        if result["steps"]:
+            step = result["steps"][0]
+            assert "step_id" in step
+            assert "timestamp" in step
+            assert "task" in step
+            assert "response" in step
+            assert step["task"] == task
+            assert step["response"] == "Response for loop 1"
+
+        assert len(self.agent.agent_output.steps) > 0
+
+
+# ============================================================================
+# YAML AGENT CREATION TESTS
+# ============================================================================
+
+
+class TestCreateAgentsFromYaml:
+    """Test YAML agent creation functionality"""
+
+    def setUp(self):
+        """Set up test fixtures"""
+        # Mock the environment variable for API key
+        os.environ["OPENAI_API_KEY"] = "fake-api-key"
+
+        # Mock agent configuration YAML content
+        self.valid_yaml_content = """
+        agents:
+          - agent_name: "Financial-Analysis-Agent"
+            model:
+              openai_api_key: "fake-api-key"
+              model_name: "gpt-4o-mini"
+              temperature: 0.1
+              max_tokens: 2000
+            system_prompt: "financial_agent_sys_prompt"
+            max_loops: 1
+            autosave: true
+            dashboard: false
+            verbose: true
+            dynamic_temperature_enabled: true
+            saved_state_path: "finance_agent.json"
+            user_name: "swarms_corp"
+            retry_attempts: 1
+            context_length: 200000
+            return_step_meta: false
+            output_type: "str"
+            task: "How can I establish a ROTH IRA to buy stocks and get a tax break?"
+        """
+
+    @patch(
+        "builtins.open",
+        new_callable=unittest.mock.mock_open,
+        read_data="",
     )
-    assert (
-        len(response) > flow_instance.get_response_length_threshold()
+    @patch("yaml.safe_load")
+    def test_create_agents_return_agents(
+        self, mock_safe_load, mock_open
+    ):
+        """Test creating agents from YAML and returning agents"""
+        # Mock YAML content parsing
+        mock_safe_load.return_value = {
+            "agents": [
+                {
+                    "agent_name": "Financial-Analysis-Agent",
+                    "model": {
+                        "openai_api_key": "fake-api-key",
+                        "model_name": "gpt-4o-mini",
+                        "temperature": 0.1,
+                        "max_tokens": 2000,
+                    },
+                    "system_prompt": "financial_agent_sys_prompt",
+                    "max_loops": 1,
+                    "autosave": True,
+                    "dashboard": False,
+                    "verbose": True,
+                    "dynamic_temperature_enabled": True,
+                    "saved_state_path": "finance_agent.json",
+                    "user_name": "swarms_corp",
+                    "retry_attempts": 1,
+                    "context_length": 200000,
+                    "return_step_meta": False,
+                    "output_type": "str",
+                    "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
+                }
+            ]
+        }
+
+        # Test if agents are returned correctly
+        agents = create_agents_from_yaml(
+            "fake_yaml_path.yaml", return_type="agents"
+        )
+        assert len(agents) == 1
+        assert agents[0].agent_name == "Financial-Analysis-Agent"
+
+    @patch(
+        "builtins.open",
+        new_callable=unittest.mock.mock_open,
+        read_data="",
     )
-
-
-def test_flow_set_response_length_threshold(flow_instance):
-    # Test setting and getting the response length threshold
-    flow_instance.set_response_length_threshold(100)
-    assert flow_instance.get_response_length_threshold() == 100
-
-
-def test_flow_add_custom_filter(flow_instance):
-    # Test adding a custom response filter
-    flow_instance.add_response_filter("custom_filter")
-    assert "custom_filter" in flow_instance.get_response_filters()
-
-
-def test_flow_remove_custom_filter(flow_instance):
-    # Test removing a custom response filter
-    flow_instance.add_response_filter("custom_filter")
-    flow_instance.remove_response_filter("custom_filter")
-    assert "custom_filter" not in flow_instance.get_response_filters()
-
-
-def test_flow_dynamic_pacing(flow_instance):
-    # Test dynamic pacing
-    flow_instance.enable_dynamic_pacing()
-    assert flow_instance.is_dynamic_pacing_enabled() is True
-
-
-def test_flow_disable_dynamic_pacing(flow_instance):
-    # Test disabling dynamic pacing
-    flow_instance.disable_dynamic_pacing()
-    assert flow_instance.is_dynamic_pacing_enabled() is False
-
-
-def test_flow_change_prompt(flow_instance):
-    # Test changing the current prompt
-    flow_instance.change_prompt("New prompt")
-    assert flow_instance.get_current_prompt() == "New prompt"
-
-
-def test_flow_add_instruction(flow_instance):
-    # Test adding an instruction to the conversation
-    flow_instance.add_instruction("Follow these steps:")
-    assert "Follow these steps:" in flow_instance.get_instructions()
-
-
-def test_flow_clear_instructions(flow_instance):
-    # Test clearing all instructions from the conversation
-    flow_instance.add_instruction("Follow these steps:")
-    flow_instance.clear_instructions()
-    assert len(flow_instance.get_instructions()) == 0
-
-
-def test_flow_add_user_message(flow_instance):
-    # Test adding a user message to the conversation
-    flow_instance.add_user_message("User message")
-    assert "User message" in flow_instance.get_user_messages()
-
-
-def test_flow_clear_user_messages(flow_instance):
-    # Test clearing all user messages from the conversation
-    flow_instance.add_user_message("User message")
-    flow_instance.clear_user_messages()
-    assert len(flow_instance.get_user_messages()) == 0
-
-
-def test_flow_get_response_history(flow_instance):
-    # Test getting the response history
-    flow_instance.run("Message 1")
-    flow_instance.run("Message 2")
-    history = flow_instance.get_response_history()
-    assert len(history) == 2
-    assert "Message 1" in history[0]
-    assert "Message 2" in history[1]
-
-
-def test_flow_clear_response_history(flow_instance):
-    # Test clearing the response history
-    flow_instance.run("Message 1")
-    flow_instance.run("Message 2")
-    flow_instance.clear_response_history()
-    assert len(flow_instance.get_response_history()) == 0
-
-
-def test_flow_get_conversation_log(flow_instance):
-    # Test getting the entire conversation log
-    flow_instance.run("Message 1")
-    flow_instance.run("Message 2")
-    conversation_log = flow_instance.get_conversation_log()
-    assert (
-        len(conversation_log) == 4
-    )  # Including system and user messages
-
-
-def test_flow_clear_conversation_log(flow_instance):
-    # Test clearing the entire conversation log
-    flow_instance.run("Message 1")
-    flow_instance.run("Message 2")
-    flow_instance.clear_conversation_log()
-    assert len(flow_instance.get_conversation_log()) == 0
-
-
-def test_flow_get_state(flow_instance):
-    # Test getting the current state of the Agent instance
-    state = flow_instance.get_state()
-    assert isinstance(state, dict)
-    assert "current_prompt" in state
-    assert "instructions" in state
-    assert "user_messages" in state
-    assert "response_history" in state
-    assert "conversation_log" in state
-    assert "dynamic_pacing_enabled" in state
-    assert "response_length_threshold" in state
-    assert "response_filters" in state
-    assert "max_loops" in state
-    assert "autosave_path" in state
-
-
-def test_flow_load_state(flow_instance):
-    # Test loading the state into the Agent instance
-    state = {
-        "current_prompt": "Loaded prompt",
-        "instructions": ["Step 1", "Step 2"],
-        "user_messages": ["User message 1", "User message 2"],
-        "response_history": ["Response 1", "Response 2"],
-        "conversation_log": [
-            "System message 1",
-            "User message 1",
-            "System message 2",
-            "User message 2",
-        ],
-        "dynamic_pacing_enabled": True,
-        "response_length_threshold": 50,
-        "response_filters": ["filter1", "filter2"],
-        "max_loops": 10,
-        "autosave_path": "/path/to/load",
-    }
-    flow_instance.load(state)
-    assert flow_instance.get_current_prompt() == "Loaded prompt"
-    assert "Step 1" in flow_instance.get_instructions()
-    assert "User message 1" in flow_instance.get_user_messages()
-    assert "Response 1" in flow_instance.get_response_history()
-    assert "System message 1" in flow_instance.get_conversation_log()
-    assert flow_instance.is_dynamic_pacing_enabled() is True
-    assert flow_instance.get_response_length_threshold() == 50
-    assert "filter1" in flow_instance.get_response_filters()
-    assert flow_instance.get_max_loops() == 10
-    assert flow_instance.get_autosave_path() == "/path/to/load"
-
-
-def test_flow_save_state(flow_instance):
-    # Test saving the state of the Agent instance
-    flow_instance.change_prompt("New prompt")
-    flow_instance.add_instruction("Step 1")
-    flow_instance.add_user_message("User message")
-    flow_instance.run("Response")
-    state = flow_instance.save_state()
-    assert "current_prompt" in state
-    assert "instructions" in state
-    assert "user_messages" in state
-    assert "response_history" in state
-    assert "conversation_log" in state
-    assert "dynamic_pacing_enabled" in state
-    assert "response_length_threshold" in state
-    assert "response_filters" in state
-    assert "max_loops" in state
-    assert "autosave_path" in state
-
-
-def test_flow_rollback(flow_instance):
-    # Test rolling back to a previous state
-    state1 = flow_instance.get_state()
-    flow_instance.change_prompt("New prompt")
-    flow_instance.get_state()
-    flow_instance.rollback_to_state(state1)
-    assert (
-        flow_instance.get_current_prompt() == state1["current_prompt"]
-    )
-    assert flow_instance.get_instructions() == state1["instructions"]
-    assert (
-        flow_instance.get_user_messages() == state1["user_messages"]
-    )
-    assert (
-        flow_instance.get_response_history()
-        == state1["response_history"]
+    @patch("yaml.safe_load")
+    @patch(
+        "swarms.Agent.run", return_value="Task completed successfully"
     )
-    assert (
-        flow_instance.get_conversation_log()
-        == state1["conversation_log"]
+    def test_create_agents_return_tasks(
+        self, mock_agent_run, mock_safe_load, mock_open
+    ):
+        """Test creating agents from YAML and returning task results"""
+        # Mock YAML content parsing
+        mock_safe_load.return_value = {
+            "agents": [
+                {
+                    "agent_name": "Financial-Analysis-Agent",
+                    "model": {
+                        "openai_api_key": "fake-api-key",
+                        "model_name": "gpt-4o-mini",
+                        "temperature": 0.1,
+                        "max_tokens": 2000,
+                    },
+                    "system_prompt": "financial_agent_sys_prompt",
+                    "max_loops": 1,
+                    "autosave": True,
+                    "dashboard": False,
+                    "verbose": True,
+                    "dynamic_temperature_enabled": True,
+                    "saved_state_path": "finance_agent.json",
+                    "user_name": "swarms_corp",
+                    "retry_attempts": 1,
+                    "context_length": 200000,
+                    "return_step_meta": False,
+                    "output_type": "str",
+                    "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
+                }
+            ]
+        }
+
+        # Test if tasks are executed and results are returned
+        task_results = create_agents_from_yaml(
+            "fake_yaml_path.yaml", return_type="tasks"
+        )
+        assert len(task_results) == 1
+        assert (
+            task_results[0]["agent_name"]
+            == "Financial-Analysis-Agent"
+        )
+        assert task_results[0]["output"] is not None
+
+    @patch(
+        "builtins.open",
+        new_callable=unittest.mock.mock_open,
+        read_data="",
     )
-    assert (
-        flow_instance.is_dynamic_pacing_enabled()
-        == state1["dynamic_pacing_enabled"]
+    @patch("yaml.safe_load")
+    def test_create_agents_return_both(
+        self, mock_safe_load, mock_open
+    ):
+        """Test creating agents from YAML and returning both agents and tasks"""
+        # Mock YAML content parsing
+        mock_safe_load.return_value = {
+            "agents": [
+                {
+                    "agent_name": "Financial-Analysis-Agent",
+                    "model": {
+                        "openai_api_key": "fake-api-key",
+                        "model_name": "gpt-4o-mini",
+                        "temperature": 0.1,
+                        "max_tokens": 2000,
+                    },
+                    "system_prompt": "financial_agent_sys_prompt",
+                    "max_loops": 1,
+                    "autosave": True,
+                    "dashboard": False,
+                    "verbose": True,
+                    "dynamic_temperature_enabled": True,
+                    "saved_state_path": "finance_agent.json",
+                    "user_name": "swarms_corp",
+                    "retry_attempts": 1,
+                    "context_length": 200000,
+                    "return_step_meta": False,
+                    "output_type": "str",
+                    "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
+                }
+            ]
+        }
+
+        # Test if both agents and tasks are returned
+        agents, task_results = create_agents_from_yaml(
+            "fake_yaml_path.yaml", return_type="both"
+        )
+        assert len(agents) == 1
+        assert len(task_results) == 1
+        assert agents[0].agent_name == "Financial-Analysis-Agent"
+        assert task_results[0]["output"] is not None
+
+    @patch(
+        "builtins.open",
+        new_callable=unittest.mock.mock_open,
+        read_data="",
     )
-    assert (
-        flow_instance.get_response_length_threshold()
-        == state1["response_length_threshold"]
+    @patch("yaml.safe_load")
+    def test_missing_agents_in_yaml(self, mock_safe_load, mock_open):
+        """Test handling missing agents in YAML"""
+        # Mock YAML content with missing "agents" key
+        mock_safe_load.return_value = {}
+
+        # Test if the function raises an error for missing "agents" key
+        with pytest.raises(ValueError) as context:
+            create_agents_from_yaml(
+                "fake_yaml_path.yaml", return_type="agents"
+            )
+        assert (
+            "The YAML configuration does not contain 'agents'."
+            in str(context.exception)
+        )
+
+    @patch(
+        "builtins.open",
+        new_callable=unittest.mock.mock_open,
+        read_data="",
     )
-    assert (
-        flow_instance.get_response_filters()
-        == state1["response_filters"]
-    )
-    assert flow_instance.get_max_loops() == state1["max_loops"]
-    assert (
-        flow_instance.get_autosave_path() == state1["autosave_path"]
-    )
-    assert flow_instance.get_state() == state1
-
-
-def test_flow_contextual_intent(flow_instance):
-    # Test contextual intent handling
-    flow_instance.add_context("location", "New York")
-    flow_instance.add_context("time", "tomorrow")
-    response = flow_instance.run(
-        "What's the weather like in {location} at {time}?"
-    )
-    assert "New York" in response
-    assert "tomorrow" in response
-
-
-def test_flow_contextual_intent_override(flow_instance):
-    # Test contextual intent override
-    flow_instance.add_context("location", "New York")
-    response1 = flow_instance.run(
-        "What's the weather like in {location}?"
-    )
-    flow_instance.add_context("location", "Los Angeles")
-    response2 = flow_instance.run(
-        "What's the weather like in {location}?"
-    )
-    assert "New York" in response1
-    assert "Los Angeles" in response2
-
-
-def test_flow_contextual_intent_reset(flow_instance):
-    # Test resetting contextual intent
-    flow_instance.add_context("location", "New York")
-    response1 = flow_instance.run(
-        "What's the weather like in {location}?"
-    )
-    flow_instance.reset_context()
-    response2 = flow_instance.run(
-        "What's the weather like in {location}?"
-    )
-    assert "New York" in response1
-    assert "New York" in response2
-
-
-# Add more test cases as needed to cover various aspects of your Agent class
-def test_flow_interruptible(flow_instance):
-    # Test interruptible mode
-    flow_instance.interruptible = True
-    response = flow_instance.run("Interrupt me!")
-    assert "Interrupted" in response
-    assert flow_instance.is_interrupted() is True
-
-
-def test_flow_non_interruptible(flow_instance):
-    # Test non-interruptible mode
-    flow_instance.interruptible = False
-    response = flow_instance.run("Do not interrupt me!")
-    assert "Do not interrupt me!" in response
-    assert flow_instance.is_interrupted() is False
-
-
-def test_flow_timeout(flow_instance):
-    # Test conversation timeout
-    flow_instance.timeout = 60  # Set a timeout of 60 seconds
-    response = flow_instance.run(
-        "This should take some time to respond."
-    )
-    assert "Timed out" in response
-    assert flow_instance.is_timed_out() is True
-
-
-def test_flow_no_timeout(flow_instance):
-    # Test no conversation timeout
-    flow_instance.timeout = None
-    response = flow_instance.run("This should not time out.")
-    assert "This should not time out." in response
-    assert flow_instance.is_timed_out() is False
-
-
-def test_flow_custom_delimiter(flow_instance):
-    # Test setting and getting a custom message delimiter
-    flow_instance.set_message_delimiter("|||")
-    assert flow_instance.get_message_delimiter() == "|||"
-
-
-def test_flow_message_history(flow_instance):
-    # Test getting the message history
-    flow_instance.run("Message 1")
-    flow_instance.run("Message 2")
-    history = flow_instance.get_message_history()
-    assert len(history) == 2
-    assert "Message 1" in history[0]
-    assert "Message 2" in history[1]
-
-
-def test_flow_clear_message_history(flow_instance):
-    # Test clearing the message history
-    flow_instance.run("Message 1")
-    flow_instance.run("Message 2")
-    flow_instance.clear_message_history()
-    assert len(flow_instance.get_message_history()) == 0
-
-
-def test_flow_save_and_load_conversation(flow_instance):
-    # Test saving and loading the conversation
-    flow_instance.run("Message 1")
-    flow_instance.run("Message 2")
-    saved_conversation = flow_instance.save_conversation()
-    flow_instance.clear_conversation()
-    flow_instance.load_conversation(saved_conversation)
-    assert len(flow_instance.get_message_history()) == 2
-
-
-def test_flow_inject_custom_system_message(flow_instance):
-    # Test injecting a custom system message into the conversation
-    flow_instance.inject_custom_system_message(
-        "Custom system message"
-    )
-    assert (
-        "Custom system message" in flow_instance.get_message_history()
-    )
-
-
-def test_flow_inject_custom_user_message(flow_instance):
-    # Test injecting a custom user message into the conversation
-    flow_instance.inject_custom_user_message("Custom user message")
-    assert (
-        "Custom user message" in flow_instance.get_message_history()
-    )
-
-
-def test_flow_inject_custom_response(flow_instance):
-    # Test injecting a custom response into the conversation
-    flow_instance.inject_custom_response("Custom response")
-    assert "Custom response" in flow_instance.get_message_history()
-
-
-def test_flow_clear_injected_messages(flow_instance):
-    # Test clearing injected messages from the conversation
-    flow_instance.inject_custom_system_message(
-        "Custom system message"
-    )
-    flow_instance.inject_custom_user_message("Custom user message")
-    flow_instance.inject_custom_response("Custom response")
-    flow_instance.clear_injected_messages()
-    assert (
-        "Custom system message"
-        not in flow_instance.get_message_history()
-    )
-    assert (
-        "Custom user message"
-        not in flow_instance.get_message_history()
-    )
-    assert (
-        "Custom response" not in flow_instance.get_message_history()
-    )
-
-
-def test_flow_disable_message_history(flow_instance):
-    # Test disabling message history recording
-    flow_instance.disable_message_history()
-    response = flow_instance.run(
-        "This message should not be recorded in history."
-    )
-    assert (
-        "This message should not be recorded in history." in response
-    )
-    assert (
-        len(flow_instance.get_message_history()) == 0
-    )  # History is empty
-
-
-def test_flow_enable_message_history(flow_instance):
-    # Test enabling message history recording
-    flow_instance.enable_message_history()
-    response = flow_instance.run(
-        "This message should be recorded in history."
-    )
-    assert "This message should be recorded in history." in response
-    assert len(flow_instance.get_message_history()) == 1
-
-
-def test_flow_custom_logger(flow_instance):
-    # Test setting and using a custom logger
-    custom_logger = logger  # Replace with your custom logger class
-    flow_instance.set_logger(custom_logger)
-    response = flow_instance.run("Custom logger test")
-    assert (
-        "Logged using custom logger" in response
-    )  # Verify logging message
-
-
-def test_flow_batch_processing(flow_instance):
-    # Test batch processing of messages
-    messages = ["Message 1", "Message 2", "Message 3"]
-    responses = flow_instance.process_batch(messages)
-    assert isinstance(responses, list)
-    assert len(responses) == len(messages)
-    for response in responses:
-        assert isinstance(response, str)
-
-
-def test_flow_custom_metrics(flow_instance):
-    # Test tracking custom metrics
-    flow_instance.track_custom_metric("custom_metric_1", 42)
-    flow_instance.track_custom_metric("custom_metric_2", 3.14)
-    metrics = flow_instance.get_custom_metrics()
-    assert "custom_metric_1" in metrics
-    assert "custom_metric_2" in metrics
-    assert metrics["custom_metric_1"] == 42
-    assert metrics["custom_metric_2"] == 3.14
-
-
-def test_flow_reset_metrics(flow_instance):
-    # Test resetting custom metrics
-    flow_instance.track_custom_metric("custom_metric_1", 42)
-    flow_instance.track_custom_metric("custom_metric_2", 3.14)
-    flow_instance.reset_custom_metrics()
-    metrics = flow_instance.get_custom_metrics()
-    assert len(metrics) == 0
-
-
-def test_flow_retrieve_context(flow_instance):
-    # Test retrieving context
-    flow_instance.add_context("location", "New York")
-    context = flow_instance.get_context("location")
-    assert context == "New York"
-
-
-def test_flow_update_context(flow_instance):
-    # Test updating context
-    flow_instance.add_context("location", "New York")
-    flow_instance.update_context("location", "Los Angeles")
-    context = flow_instance.get_context("location")
-    assert context == "Los Angeles"
-
-
-def test_flow_remove_context(flow_instance):
-    # Test removing context
-    flow_instance.add_context("location", "New York")
-    flow_instance.remove_context("location")
-    context = flow_instance.get_context("location")
-    assert context is None
-
-
-def test_flow_clear_context(flow_instance):
-    # Test clearing all context
-    flow_instance.add_context("location", "New York")
-    flow_instance.add_context("time", "tomorrow")
-    flow_instance.clear_context()
-    context_location = flow_instance.get_context("location")
-    context_time = flow_instance.get_context("time")
-    assert context_location is None
-    assert context_time is None
-
-
-def test_flow_input_validation(flow_instance):
-    # Test input validation for invalid agent configurations
-    with pytest.raises(ValueError):
-        Agent(config=None)  # Invalid config, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.set_message_delimiter(
-            ""
-        )  # Empty delimiter, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.set_message_delimiter(
-            None
-        )  # None delimiter, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.set_message_delimiter(
-            123
-        )  # Invalid delimiter type, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.set_logger(
-            "invalid_logger"
-        )  # Invalid logger type, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.add_context(
-            None, "value"
-        )  # None key, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.add_context(
-            "key", None
-        )  # None value, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.update_context(
-            None, "value"
-        )  # None key, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.update_context(
-            "key", None
-        )  # None value, should raise ValueError
-
-
-def test_flow_conversation_reset(flow_instance):
-    # Test conversation reset
-    flow_instance.run("Message 1")
-    flow_instance.run("Message 2")
-    flow_instance.reset_conversation()
-    assert len(flow_instance.get_message_history()) == 0
-
-
-def test_flow_conversation_persistence(flow_instance):
-    # Test conversation persistence across instances
-    flow_instance.run("Message 1")
-    flow_instance.run("Message 2")
-    conversation = flow_instance.get_conversation()
-
-    new_flow_instance = Agent()
-    new_flow_instance.load_conversation(conversation)
-    assert len(new_flow_instance.get_message_history()) == 2
-    assert "Message 1" in new_flow_instance.get_message_history()[0]
-    assert "Message 2" in new_flow_instance.get_message_history()[1]
-
-
-def test_flow_custom_event_listener(flow_instance):
-    # Test custom event listener
-    class CustomEventListener:
-        def on_message_received(self, message):
+    @patch("yaml.safe_load")
+    def test_invalid_return_type(self, mock_safe_load, mock_open):
+        """Test handling invalid return type"""
+        # Mock YAML content parsing
+        mock_safe_load.return_value = {
+            "agents": [
+                {
+                    "agent_name": "Financial-Analysis-Agent",
+                    "model": {
+                        "openai_api_key": "fake-api-key",
+                        "model_name": "gpt-4o-mini",
+                        "temperature": 0.1,
+                        "max_tokens": 2000,
+                    },
+                    "system_prompt": "financial_agent_sys_prompt",
+                    "max_loops": 1,
+                    "autosave": True,
+                    "dashboard": False,
+                    "verbose": True,
+                    "dynamic_temperature_enabled": True,
+                    "saved_state_path": "finance_agent.json",
+                    "user_name": "swarms_corp",
+                    "retry_attempts": 1,
+                    "context_length": 200000,
+                    "return_step_meta": False,
+                    "output_type": "str",
+                    "task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
+                }
+            ]
+        }
+
+        # Test if an error is raised for invalid return_type
+        with pytest.raises(ValueError) as context:
+            create_agents_from_yaml(
+                "fake_yaml_path.yaml", return_type="invalid_type"
+            )
+        assert "Invalid return_type" in str(context.exception)
+
+
+# ============================================================================
+# BENCHMARK TESTS
+# ============================================================================
+
+
+class TestAgentBenchmark:
+    """Test agent benchmarking functionality"""
+
+    def test_benchmark_multiple_agents(self):
+        """Test benchmarking multiple agents"""
+        console = Console()
+        init_times = []
+        memory_readings = []
+        process = psutil.Process(os.getpid())
+
+        # Create benchmark tables
+        time_table = Table(title="Time Statistics")
+        time_table.add_column("Metric", style="cyan")
+        time_table.add_column("Value", style="green")
+
+        memory_table = Table(title="Memory Statistics")
+        memory_table.add_column("Metric", style="cyan")
+        memory_table.add_column("Value", style="green")
+
+        initial_memory = process.memory_info().rss / 1024
+        start_total_time = time.perf_counter()
+
+        # Initialize agents and measure performance
+        num_agents = 10  # Reduced for testing
+        for i in range(num_agents):
+            start_time = time.perf_counter()
+
+            Agent(
+                agent_name=f"Financial-Analysis-Agent-{i}",
+                agent_description="Personal finance advisor agent",
+                max_loops=2,
+                model_name="gpt-4o-mini",
+                dynamic_temperature_enabled=True,
+                interactive=False,
+            )
+
+            init_time = (time.perf_counter() - start_time) * 1000
+            init_times.append(init_time)
+
+            current_memory = process.memory_info().rss / 1024
+            memory_readings.append(current_memory - initial_memory)
+
+            if (i + 1) % 5 == 0:
+                console.print(
+                    f"Created {i + 1} agents...", style="bold blue"
+                )
+
+        (time.perf_counter() - start_total_time) * 1000
+
+        # Calculate statistics
+        time_stats = self._get_time_stats(init_times)
+        memory_stats = self._get_memory_stats(memory_readings)
+
+        # Verify basic statistics
+        assert len(init_times) == num_agents
+        assert len(memory_readings) == num_agents
+        assert time_stats["mean"] > 0
+        assert memory_stats["mean"] >= 0
+
+        print("✓ Benchmark test passed")
+
+    def _get_memory_stats(self, memory_readings):
+        """Calculate memory statistics"""
+        return {
+            "peak": max(memory_readings) if memory_readings else 0,
+            "min": min(memory_readings) if memory_readings else 0,
+            "mean": mean(memory_readings) if memory_readings else 0,
+            "median": (
+                median(memory_readings) if memory_readings else 0
+            ),
+            "stdev": (
+                stdev(memory_readings)
+                if len(memory_readings) > 1
+                else 0
+            ),
+            "variance": (
+                variance(memory_readings)
+                if len(memory_readings) > 1
+                else 0
+            ),
+        }
+
+    def _get_time_stats(self, times):
+        """Calculate time statistics"""
+        return {
+            "total": sum(times),
+            "mean": mean(times) if times else 0,
+            "median": median(times) if times else 0,
+            "min": min(times) if times else 0,
+            "max": max(times) if times else 0,
+            "stdev": stdev(times) if len(times) > 1 else 0,
+            "variance": variance(times) if len(times) > 1 else 0,
+        }
+
+
+# ============================================================================
+# TOOL USAGE TESTS
+# ============================================================================
+
+
+class TestAgentToolUsage:
+    """Test comprehensive tool usage functionality for agents"""
+
+    def test_normal_callable_tools(self):
+        """Test normal callable tools (functions, lambdas, methods)"""
+        print("\nTesting normal callable tools...")
+
+        def math_tool(x: int, y: int) -> int:
+            """Add two numbers together"""
+            return x + y
+
+        def string_tool(text: str) -> str:
+            """Convert text to uppercase"""
+            return text.upper()
+
+        def list_tool(items: list) -> int:
+            """Count items in a list"""
+            return len(items)
+
+        # Test with individual function tools
+        agent = Agent(
+            agent_name="Callable-Tools-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[math_tool, string_tool, list_tool],
+        )
+
+        # Test tool addition
+        assert len(agent.tools) == 3, "Tools not added correctly"
+
+        # Test tool execution
+        response = agent.run("Use the math tool to add 5 and 3")
+        assert response is not None, "Tool execution failed"
+
+        # Test lambda tools
+        def lambda_tool(x):
+            return x * 2
+
+        agent.add_tool(lambda_tool)
+        assert (
+            len(agent.tools) == 4
+        ), "Lambda tool not added correctly"
+
+        # Test method tools
+        class MathOperations:
+            def multiply(self, x: int, y: int) -> int:
+                """Multiply two numbers"""
+                return x * y
+
+        math_ops = MathOperations()
+        agent.add_tool(math_ops.multiply)
+        assert (
+            len(agent.tools) == 5
+        ), "Method tool not added correctly"
+
+        print("✓ Normal callable tools test passed")
+
+    def test_tool_management_operations(self):
+        """Test tool management operations (add, remove, list)"""
+        print("\nTesting tool management operations...")
+
+        def tool1(x: int) -> int:
+            """Tool 1"""
+            return x + 1
+
+        def tool2(x: int) -> int:
+            """Tool 2"""
+            return x * 2
+
+        def tool3(x: int) -> int:
+            """Tool 3"""
+            return x - 1
+
+        agent = Agent(
+            agent_name="Tool-Management-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[tool1, tool2],
+        )
+
+        # Test initial tools
+        assert (
+            len(agent.tools) == 2
+        ), "Initial tools not set correctly"
+
+        # Test adding single tool
+        agent.add_tool(tool3)
+        assert len(agent.tools) == 3, "Single tool addition failed"
+
+        # Test adding multiple tools
+        def tool4(x: int) -> int:
+            return x**2
+
+        def tool5(x: int) -> int:
+            return x // 2
+
+        agent.add_tools([tool4, tool5])
+        assert len(agent.tools) == 5, "Multiple tools addition failed"
+
+        # Test removing single tool
+        agent.remove_tool(tool1)
+        assert len(agent.tools) == 4, "Single tool removal failed"
+
+        # Test removing multiple tools
+        agent.remove_tools([tool2, tool3])
+        assert len(agent.tools) == 2, "Multiple tools removal failed"
+
+        print("✓ Tool management operations test passed")
+
+    def test_mcp_single_url_tools(self):
+        """Test MCP single URL tools"""
+        print("\nTesting MCP single URL tools...")
+
+        # Mock MCP URL for testing
+        mock_mcp_url = "http://localhost:8000/mcp"
+
+        with patch(
+            "swarms.structs.agent.get_mcp_tools_sync"
+        ) as mock_get_tools:
+            # Mock MCP tools response
+            mock_tools = [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "mcp_calculator",
+                        "description": "Perform calculations",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "expression": {
+                                    "type": "string",
+                                    "description": "Math expression",
+                                }
+                            },
+                            "required": ["expression"],
+                        },
+                    },
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "mcp_weather",
+                        "description": "Get weather information",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "location": {
+                                    "type": "string",
+                                    "description": "City name",
+                                }
+                            },
+                            "required": ["location"],
+                        },
+                    },
+                },
+            ]
+            mock_get_tools.return_value = mock_tools
+
+            agent = Agent(
+                agent_name="MCP-Single-URL-Test-Agent",
+                model_name="gpt-4o-mini",
+                max_loops=1,
+                mcp_url=mock_mcp_url,
+                verbose=True,
+            )
+
+            # Test MCP tools integration
+            tools = agent.add_mcp_tools_to_memory()
+            assert len(tools) == 2, "MCP tools not loaded correctly"
+            assert (
+                mock_get_tools.called
+            ), "MCP tools function not called"
+
+            # Verify tool structure
+            assert "mcp_calculator" in str(
+                tools
+            ), "Calculator tool not found"
+            assert "mcp_weather" in str(
+                tools
+            ), "Weather tool not found"
+
+        print("✓ MCP single URL tools test passed")
+
+    def test_mcp_multiple_urls_tools(self):
+        """Test MCP multiple URLs tools"""
+        print("\nTesting MCP multiple URLs tools...")
+
+        # Mock multiple MCP URLs for testing
+        mock_mcp_urls = [
+            "http://localhost:8000/mcp1",
+            "http://localhost:8000/mcp2",
+            "http://localhost:8000/mcp3",
+        ]
+
+        with patch(
+            "swarms.structs.agent.get_tools_for_multiple_mcp_servers"
+        ) as mock_get_tools:
+            # Mock MCP tools response from multiple servers
+            mock_tools = [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "server1_tool",
+                        "description": "Tool from server 1",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "input": {"type": "string"}
+                            },
+                        },
+                    },
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "server2_tool",
+                        "description": "Tool from server 2",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "data": {"type": "string"}
+                            },
+                        },
+                    },
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "server3_tool",
+                        "description": "Tool from server 3",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "query": {"type": "string"}
+                            },
+                        },
+                    },
+                },
+            ]
+            mock_get_tools.return_value = mock_tools
+
+            agent = Agent(
+                agent_name="MCP-Multiple-URLs-Test-Agent",
+                model_name="gpt-4o-mini",
+                max_loops=1,
+                mcp_urls=mock_mcp_urls,
+                verbose=True,
+            )
+
+            # Test MCP tools integration from multiple servers
+            tools = agent.add_mcp_tools_to_memory()
+            assert (
+                len(tools) == 3
+            ), "MCP tools from multiple servers not loaded correctly"
+            assert (
+                mock_get_tools.called
+            ), "MCP multiple tools function not called"
+
+            # Verify tools from different servers
+            tools_str = str(tools)
+            assert (
+                "server1_tool" in tools_str
+            ), "Server 1 tool not found"
+            assert (
+                "server2_tool" in tools_str
+            ), "Server 2 tool not found"
+            assert (
+                "server3_tool" in tools_str
+            ), "Server 3 tool not found"
+
+        print("✓ MCP multiple URLs tools test passed")
+
+    def test_base_tool_class_tools(self):
+        """Test BaseTool class tools"""
+        print("\nTesting BaseTool class tools...")
+
+        from swarms.tools.base_tool import BaseTool
+
+        def sample_function(x: int, y: int) -> int:
+            """Sample function for testing"""
+            return x + y
+
+        # Create BaseTool instance
+        base_tool = BaseTool(
+            verbose=True,
+            tools=[sample_function],
+            tool_system_prompt="You are a helpful tool assistant",
+        )
+
+        # Test tool schema generation
+        schema = base_tool.func_to_dict(sample_function)
+        assert isinstance(
+            schema, dict
+        ), "Tool schema not generated correctly"
+        assert "name" in schema, "Tool name not in schema"
+        assert (
+            "description" in schema
+        ), "Tool description not in schema"
+        assert "parameters" in schema, "Tool parameters not in schema"
+
+        # Test tool execution
+        test_input = {"x": 5, "y": 3}
+        result = base_tool.execute_tool(test_input)
+        assert result is not None, "Tool execution failed"
+
+        print("✓ BaseTool class tools test passed")
+
+    def test_tool_execution_and_error_handling(self):
+        """Test tool execution and error handling"""
+        print("\nTesting tool execution and error handling...")
+
+        def valid_tool(x: int) -> int:
+            """Valid tool that works correctly"""
+            return x * 2
+
+        def error_tool(x: int) -> int:
+            """Tool that raises an error"""
+            raise ValueError("Test error")
+
+        def type_error_tool(x: str) -> str:
+            """Tool with type error"""
+            return x.upper()
+
+        agent = Agent(
+            agent_name="Tool-Execution-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[valid_tool, error_tool, type_error_tool],
+        )
+
+        # Test valid tool execution
+        response = agent.run("Use the valid tool with input 5")
+        assert response is not None, "Valid tool execution failed"
+
+        # Test error handling
+        try:
+            agent.run("Use the error tool")
+            # Should handle error gracefully
+        except Exception:
+            # Expected to handle errors gracefully
             pass
 
-        def on_response_generated(self, response):
+        print("✓ Tool execution and error handling test passed")
+
+    def test_tool_schema_generation(self):
+        """Test tool schema generation and validation"""
+        print("\nTesting tool schema generation...")
+
+        def complex_tool(
+            name: str,
+            age: int,
+            email: str = None,
+            is_active: bool = True,
+        ) -> dict:
+            """Complex tool with various parameter types"""
+            return {
+                "name": name,
+                "age": age,
+                "email": email,
+                "is_active": is_active,
+            }
+
+        agent = Agent(
+            agent_name="Tool-Schema-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[complex_tool],
+        )
+
+        # Test that tools are properly registered
+        assert len(agent.tools) == 1, "Tool not registered correctly"
+
+        # Test tool execution with complex parameters
+        response = agent.run(
+            "Use the complex tool with name 'John', age 30, email 'john@example.com'"
+        )
+        assert response is not None, "Complex tool execution failed"
+
+        print("✓ Tool schema generation test passed")
+
+    def test_aop_tools(self):
+        """Test AOP (Agent Operations) tools"""
+        print("\nTesting AOP tools...")
+
+        from swarms.structs.aop import AOP
+
+        # Create test agents
+        agent1 = Agent(
+            agent_name="AOP-Agent-1",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+        )
+
+        agent2 = Agent(
+            agent_name="AOP-Agent-2",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+        )
+
+        # Create AOP instance
+        aop = AOP(
+            server_name="test-aop-server",
+            verbose=True,
+        )
+
+        # Test adding agents as tools
+        tool_names = aop.add_agents_batch(
+            agents=[agent1, agent2],
+            tool_names=["math_agent", "text_agent"],
+            tool_descriptions=[
+                "Performs mathematical operations",
+                "Handles text processing",
+            ],
+        )
+
+        assert (
+            len(tool_names) == 2
+        ), "AOP agents not added as tools correctly"
+        assert (
+            "math_agent" in tool_names
+        ), "Math agent tool not created"
+        assert (
+            "text_agent" in tool_names
+        ), "Text agent tool not created"
+
+        # Test tool discovery
+        tools = aop.get_available_tools()
+        assert len(tools) >= 2, "AOP tools not discovered correctly"
+
+        print("✓ AOP tools test passed")
+
+    def test_tool_choice_and_execution_modes(self):
+        """Test different tool choice and execution modes"""
+        print("\nTesting tool choice and execution modes...")
+
+        def tool_a(x: int) -> int:
+            """Tool A"""
+            return x + 1
+
+        def tool_b(x: int) -> int:
+            """Tool B"""
+            return x * 2
+
+        # Test with auto tool choice
+        agent_auto = Agent(
+            agent_name="Auto-Tool-Choice-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[tool_a, tool_b],
+            tool_choice="auto",
+        )
+
+        response_auto = agent_auto.run(
+            "Calculate something using the available tools"
+        )
+        assert response_auto is not None, "Auto tool choice failed"
+
+        # Test with specific tool choice
+        agent_specific = Agent(
+            agent_name="Specific-Tool-Choice-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[tool_a, tool_b],
+            tool_choice="tool_a",
+        )
+
+        response_specific = agent_specific.run(
+            "Use tool_a with input 5"
+        )
+        assert (
+            response_specific is not None
+        ), "Specific tool choice failed"
+
+        # Test with tool execution enabled/disabled
+        agent_execute = Agent(
+            agent_name="Tool-Execute-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[tool_a, tool_b],
+            execute_tool=True,
+        )
+
+        response_execute = agent_execute.run("Execute a tool")
+        assert (
+            response_execute is not None
+        ), "Tool execution mode failed"
+
+        print("✓ Tool choice and execution modes test passed")
+
+    def test_tool_system_prompts(self):
+        """Test tool system prompts and custom tool prompts"""
+        print("\nTesting tool system prompts...")
+
+        def calculator_tool(expression: str) -> str:
+            """Calculate mathematical expressions"""
+            try:
+                result = eval(expression)
+                return str(result)
+            except Exception:
+                return "Invalid expression"
+
+        custom_tool_prompt = "You have access to a calculator tool. Use it for mathematical calculations."
+
+        agent = Agent(
+            agent_name="Tool-Prompt-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[calculator_tool],
+            tool_system_prompt=custom_tool_prompt,
+        )
+
+        # Test that custom tool prompt is set
+        assert (
+            agent.tool_system_prompt == custom_tool_prompt
+        ), "Custom tool prompt not set"
+
+        # Test tool execution with custom prompt
+        response = agent.run("Calculate 2 + 2 * 3")
+        assert (
+            response is not None
+        ), "Tool execution with custom prompt failed"
+
+        print("✓ Tool system prompts test passed")
+
+    def test_tool_parallel_execution(self):
+        """Test parallel tool execution capabilities"""
+        print("\nTesting parallel tool execution...")
+
+        def slow_tool(x: int) -> int:
+            """Slow tool that takes time"""
+            import time
+
+            time.sleep(0.1)  # Simulate slow operation
+            return x * 2
+
+        def fast_tool(x: int) -> int:
+            """Fast tool"""
+            return x + 1
+
+        agent = Agent(
+            agent_name="Parallel-Tool-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[slow_tool, fast_tool],
+        )
+
+        # Test parallel tool execution
+        start_time = time.time()
+        response = agent.run("Use both tools with input 5")
+        end_time = time.time()
+
+        assert response is not None, "Parallel tool execution failed"
+        # Should be faster than sequential execution
+        assert (
+            end_time - start_time
+        ) < 0.5, "Parallel execution took too long"
+
+        print("✓ Parallel tool execution test passed")
+
+    def test_tool_validation_and_type_checking(self):
+        """Test tool validation and type checking"""
+        print("\nTesting tool validation and type checking...")
+
+        def typed_tool(x: int, y: str, z: bool = False) -> dict:
+            """Tool with specific type hints"""
+            return {"x": x, "y": y, "z": z, "result": f"{x} {y} {z}"}
+
+        agent = Agent(
+            agent_name="Tool-Validation-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[typed_tool],
+        )
+
+        # Test tool execution with correct types
+        response = agent.run(
+            "Use typed_tool with x=5, y='hello', z=True"
+        )
+        assert response is not None, "Typed tool execution failed"
+
+        # Test tool execution with incorrect types (should handle gracefully)
+        try:
+            agent.run("Use typed_tool with incorrect types")
+        except Exception:
+            # Expected to handle type errors gracefully
             pass
 
-    custom_event_listener = CustomEventListener()
-    flow_instance.add_event_listener(custom_event_listener)
-
-    # Ensure that the custom event listener methods are called during a conversation
-    with mock.patch.object(
-        custom_event_listener, "on_message_received"
-    ) as mock_received, mock.patch.object(
-        custom_event_listener, "on_response_generated"
-    ) as mock_response:
-        flow_instance.run("Message 1")
-        mock_received.assert_called_once()
-        mock_response.assert_called_once()
-
+        print("✓ Tool validation and type checking test passed")
 
-def test_flow_multiple_event_listeners(flow_instance):
-    # Test multiple event listeners
-    class FirstEventListener:
-        def on_message_received(self, message):
-            pass
+    def test_tool_caching_and_performance(self):
+        """Test tool caching and performance optimization"""
+        print("\nTesting tool caching and performance...")
 
-        def on_response_generated(self, response):
-            pass
+        call_count = 0
 
-    class SecondEventListener:
-        def on_message_received(self, message):
-            pass
+        def cached_tool(x: int) -> int:
+            """Tool that should be cached"""
+            nonlocal call_count
+            call_count += 1
+            return x**2
 
-        def on_response_generated(self, response):
-            pass
+        agent = Agent(
+            agent_name="Tool-Caching-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[cached_tool],
+        )
 
-    first_event_listener = FirstEventListener()
-    second_event_listener = SecondEventListener()
-    flow_instance.add_event_listener(first_event_listener)
-    flow_instance.add_event_listener(second_event_listener)
-
-    # Ensure that both event listeners receive events during a conversation
-    with mock.patch.object(
-        first_event_listener, "on_message_received"
-    ) as mock_first_received, mock.patch.object(
-        first_event_listener, "on_response_generated"
-    ) as mock_first_response, mock.patch.object(
-        second_event_listener, "on_message_received"
-    ) as mock_second_received, mock.patch.object(
-        second_event_listener, "on_response_generated"
-    ) as mock_second_response:
-        flow_instance.run("Message 1")
-        mock_first_received.assert_called_once()
-        mock_first_response.assert_called_once()
-        mock_second_received.assert_called_once()
-        mock_second_response.assert_called_once()
-
-
-# Add more test cases as needed to cover various aspects of your Agent class
-def test_flow_error_handling(flow_instance):
-    # Test error handling and exceptions
-    with pytest.raises(ValueError):
-        flow_instance.set_message_delimiter(
-            ""
-        )  # Empty delimiter, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.set_message_delimiter(
-            None
-        )  # None delimiter, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.set_logger(
-            "invalid_logger"
-        )  # Invalid logger type, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.add_context(
-            None, "value"
-        )  # None key, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.add_context(
-            "key", None
-        )  # None value, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.update_context(
-            None, "value"
-        )  # None key, should raise ValueError
-
-    with pytest.raises(ValueError):
-        flow_instance.update_context(
-            "key", None
-        )  # None value, should raise ValueError
-
-
-def test_flow_context_operations(flow_instance):
-    # Test context operations
-    flow_instance.add_context("user_id", "12345")
-    assert flow_instance.get_context("user_id") == "12345"
-    flow_instance.update_context("user_id", "54321")
-    assert flow_instance.get_context("user_id") == "54321"
-    flow_instance.remove_context("user_id")
-    assert flow_instance.get_context("user_id") is None
-
-
-# Add more test cases as needed to cover various aspects of your Agent class
-
-
-def test_flow_long_messages(flow_instance):
-    # Test handling of long messages
-    long_message = "A" * 10000  # Create a very long message
-    flow_instance.run(long_message)
-    assert len(flow_instance.get_message_history()) == 1
-    assert flow_instance.get_message_history()[0] == long_message
-
-
-def test_flow_custom_response(flow_instance):
-    # Test custom response generation
-    def custom_response_generator(message):
-        if message == "Hello":
-            return "Hi there!"
-        elif message == "How are you?":
-            return "I'm doing well, thank you."
-        else:
-            return "I don't understand."
-
-    flow_instance.set_response_generator(custom_response_generator)
-
-    assert flow_instance.run("Hello") == "Hi there!"
-    assert (
-        flow_instance.run("How are you?")
-        == "I'm doing well, thank you."
-    )
-    assert (
-        flow_instance.run("What's your name?")
-        == "I don't understand."
-    )
-
-
-def test_flow_message_validation(flow_instance):
-    # Test message validation
-    def custom_message_validator(message):
-        return len(message) > 0  # Reject empty messages
-
-    flow_instance.set_message_validator(custom_message_validator)
-
-    assert flow_instance.run("Valid message") is not None
-    assert (
-        flow_instance.run("") is None
-    )  # Empty message should be rejected
-    assert (
-        flow_instance.run(None) is None
-    )  # None message should be rejected
-
-
-def test_flow_custom_logging(flow_instance):
-    custom_logger = logger
-    flow_instance.set_logger(custom_logger)
-
-    with mock.patch.object(custom_logger, "log") as mock_log:
-        flow_instance.run("Message")
-        mock_log.assert_called_once_with("Message")
-
-
-def test_flow_performance(flow_instance):
-    # Test the performance of the Agent class by running a large number of messages
-    num_messages = 1000
-    for i in range(num_messages):
-        flow_instance.run(f"Message {i}")
-    assert len(flow_instance.get_message_history()) == num_messages
-
-
-def test_flow_complex_use_case(flow_instance):
-    # Test a complex use case scenario
-    flow_instance.add_context("user_id", "12345")
-    flow_instance.run("Hello")
-    flow_instance.run("How can I help you?")
-    assert (
-        flow_instance.get_response() == "Please provide more details."
-    )
-    flow_instance.update_context("user_id", "54321")
-    flow_instance.run("I need help with my order")
-    assert (
-        flow_instance.get_response()
-        == "Sure, I can assist with that."
-    )
-    flow_instance.reset_conversation()
-    assert len(flow_instance.get_message_history()) == 0
-    assert flow_instance.get_context("user_id") is None
+        # Test multiple calls to the same tool
+        agent.run("Use cached_tool with input 5")
+        agent.run("Use cached_tool with input 5 again")
 
+        # Verify tool was called (caching behavior may vary)
+        assert call_count >= 1, "Tool not called at least once"
+
+        print("✓ Tool caching and performance test passed")
 
-# Add more test cases as needed to cover various aspects of your Agent class
-def test_flow_context_handling(flow_instance):
-    # Test context handling
-    flow_instance.add_context("user_id", "12345")
-    assert flow_instance.get_context("user_id") == "12345"
-    flow_instance.update_context("user_id", "54321")
-    assert flow_instance.get_context("user_id") == "54321"
-    flow_instance.remove_context("user_id")
-    assert flow_instance.get_context("user_id") is None
+    def test_tool_error_recovery(self):
+        """Test tool error recovery and fallback mechanisms"""
+        print("\nTesting tool error recovery...")
 
+        def unreliable_tool(x: int) -> int:
+            """Tool that sometimes fails"""
+            import random
 
-def test_flow_concurrent_requests(flow_instance):
-    # Test concurrent message processing
-    import threading
+            if random.random() < 0.5:
+                raise Exception("Random failure")
+            return x * 2
 
-    def send_messages():
-        for i in range(100):
-            flow_instance.run(f"Message {i}")
+        def fallback_tool(x: int) -> int:
+            """Fallback tool"""
+            return x + 10
 
-    threads = []
-    for _ in range(5):
-        thread = threading.Thread(target=send_messages)
-        threads.append(thread)
-        thread.start()
-
-    for thread in threads:
-        thread.join()
-
-    assert len(flow_instance.get_message_history()) == 500
-
-
-def test_flow_custom_timeout(flow_instance):
-    # Test custom timeout handling
-    flow_instance.set_timeout(
-        10
-    )  # Set a custom timeout of 10 seconds
-    assert flow_instance.get_timeout() == 10
-
-    import time
-
-    start_time = time.time()
-    flow_instance.run("Long-running operation")
-    end_time = time.time()
-    execution_time = end_time - start_time
-    assert execution_time >= 10  # Ensure the timeout was respected
-
-
-# Add more test cases as needed to thoroughly cover your Agent class
-
-
-def test_flow_interactive_run(flow_instance, capsys):
-    # Test interactive run mode
-    # Simulate user input and check if the AI responds correctly
-    user_input = ["Hello", "How can you help me?", "Exit"]
-
-    def simulate_user_input(input_list):
-        input_index = 0
-        while input_index < len(input_list):
-            user_response = input_list[input_index]
-            flow_instance.interactive_run(max_loops=1)
-
-            # Capture the AI's response
-            captured = capsys.readouterr()
-            ai_response = captured.out.strip()
-
-            assert f"You: {user_response}" in captured.out
-            assert "AI:" in captured.out
-
-            # Check if the AI's response matches the expected response
-            expected_response = f"AI: {ai_response}"
-            assert expected_response in captured.out
-
-            input_index += 1
-
-    simulate_user_input(user_input)
-
-
-# Assuming you have already defined your Agent class and created an instance for testing
-
-
-def test_flow_agent_history_prompt(flow_instance):
-    # Test agent history prompt generation
-    system_prompt = "This is the system prompt."
-    history = ["User: Hi", "AI: Hello"]
-
-    agent_history_prompt = flow_instance.agent_history_prompt(
-        system_prompt, history
-    )
-
-    assert (
-        "SYSTEM_PROMPT: This is the system prompt."
-        in agent_history_prompt
-    )
-    assert (
-        "History: ['User: Hi', 'AI: Hello']" in agent_history_prompt
-    )
-
-
-async def test_flow_run_concurrent(flow_instance):
-    # Test running tasks concurrently
-    tasks = ["Task 1", "Task 2", "Task 3"]
-    completed_tasks = await flow_instance.run_concurrent(tasks)
-
-    # Ensure that all tasks are completed
-    assert len(completed_tasks) == len(tasks)
-
-
-def test_flow_bulk_run(flow_instance):
-    # Test bulk running of tasks
-    input_data = [
-        {"task": "Task 1", "param1": "value1"},
-        {"task": "Task 2", "param2": "value2"},
-        {"task": "Task 3", "param3": "value3"},
+        agent = Agent(
+            agent_name="Tool-Recovery-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[unreliable_tool, fallback_tool],
+            retry_attempts=3,
+        )
+
+        # Test error recovery
+        response = agent.run("Use unreliable_tool with input 5")
+        assert response is not None, "Tool error recovery failed"
+
+        print("✓ Tool error recovery test passed")
+
+    def test_tool_with_different_output_types(self):
+        """Test tools with different output types"""
+        print("\nTesting tools with different output types...")
+
+        def json_tool(data: dict) -> str:
+            """Tool that returns JSON string"""
+            import json
+
+            return json.dumps(data)
+
+        def yaml_tool(data: dict) -> str:
+            """Tool that returns YAML string"""
+            import yaml
+
+            return yaml.dump(data)
+
+        def dict_tool(x: int) -> dict:
+            """Tool that returns dictionary"""
+            return {"value": x, "squared": x**2}
+
+        agent = Agent(
+            agent_name="Output-Types-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[json_tool, yaml_tool, dict_tool],
+        )
+
+        # Test JSON tool
+        response = agent.run(
+            "Use json_tool with data {'name': 'test', 'value': 123}"
+        )
+        assert response is not None, "JSON tool execution failed"
+
+        # Test YAML tool
+        response = agent.run(
+            "Use yaml_tool with data {'key': 'value'}"
+        )
+        assert response is not None, "YAML tool execution failed"
+
+        # Test dict tool
+        response = agent.run("Use dict_tool with input 5")
+        assert response is not None, "Dict tool execution failed"
+
+        print("✓ Tools with different output types test passed")
+
+    def test_tool_with_async_execution(self):
+        """Test tools with async execution"""
+        print("\nTesting tools with async execution...")
+
+        async def async_tool(x: int) -> int:
+            """Async tool that performs async operation"""
+            import asyncio
+
+            await asyncio.sleep(0.01)  # Simulate async operation
+            return x * 2
+
+        def sync_tool(x: int) -> int:
+            """Sync tool"""
+            return x + 1
+
+        agent = Agent(
+            agent_name="Async-Tool-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[
+                sync_tool
+            ],  # Note: async tools need special handling
+        )
+
+        # Test sync tool execution
+        response = agent.run("Use sync_tool with input 5")
+        assert response is not None, "Sync tool execution failed"
+
+        print("✓ Tools with async execution test passed")
+
+    def test_tool_with_file_operations(self):
+        """Test tools that perform file operations"""
+        print("\nTesting tools with file operations...")
+
+        import os
+        import tempfile
+
+        def file_writer_tool(filename: str, content: str) -> str:
+            """Tool that writes content to a file"""
+            with open(filename, "w") as f:
+                f.write(content)
+            return f"Written {len(content)} characters to {filename}"
+
+        def file_reader_tool(filename: str) -> str:
+            """Tool that reads content from a file"""
+            try:
+                with open(filename, "r") as f:
+                    return f.read()
+            except FileNotFoundError:
+                return "File not found"
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            test_file = os.path.join(temp_dir, "test.txt")
+
+            agent = Agent(
+                agent_name="File-Ops-Test-Agent",
+                model_name="gpt-4o-mini",
+                max_loops=1,
+                tools=[file_writer_tool, file_reader_tool],
+            )
+
+            # Test file writing
+            response = agent.run(
+                f"Use file_writer_tool to write 'Hello World' to {test_file}"
+            )
+            assert (
+                response is not None
+            ), "File writing tool execution failed"
+
+            # Test file reading
+            response = agent.run(
+                f"Use file_reader_tool to read from {test_file}"
+            )
+            assert (
+                response is not None
+            ), "File reading tool execution failed"
+
+        print("✓ Tools with file operations test passed")
+
+    def test_tool_with_network_operations(self):
+        """Test tools that perform network operations"""
+        print("\nTesting tools with network operations...")
+
+        def url_tool(url: str) -> str:
+            """Tool that processes URLs"""
+            return f"Processing URL: {url}"
+
+        def api_tool(endpoint: str, method: str = "GET") -> str:
+            """Tool that simulates API calls"""
+            return f"API {method} request to {endpoint}"
+
+        agent = Agent(
+            agent_name="Network-Ops-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[url_tool, api_tool],
+        )
+
+        # Test URL tool
+        response = agent.run(
+            "Use url_tool with 'https://example.com'"
+        )
+        assert response is not None, "URL tool execution failed"
+
+        # Test API tool
+        response = agent.run(
+            "Use api_tool with endpoint '/api/data' and method 'POST'"
+        )
+        assert response is not None, "API tool execution failed"
+
+        print("✓ Tools with network operations test passed")
+
+    def test_tool_with_database_operations(self):
+        """Test tools that perform database operations"""
+        print("\nTesting tools with database operations...")
+
+        def db_query_tool(query: str) -> str:
+            """Tool that simulates database queries"""
+            return f"Executed query: {query}"
+
+        def db_insert_tool(table: str, data: dict) -> str:
+            """Tool that simulates database inserts"""
+            return f"Inserted data into {table}: {data}"
+
+        agent = Agent(
+            agent_name="Database-Ops-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[db_query_tool, db_insert_tool],
+        )
+
+        # Test database query
+        response = agent.run(
+            "Use db_query_tool with 'SELECT * FROM users'"
+        )
+        assert (
+            response is not None
+        ), "Database query tool execution failed"
+
+        # Test database insert
+        response = agent.run(
+            "Use db_insert_tool with table 'users' and data {'name': 'John'}"
+        )
+        assert (
+            response is not None
+        ), "Database insert tool execution failed"
+
+        print("✓ Tools with database operations test passed")
+
+    def test_tool_with_machine_learning_operations(self):
+        """Test tools that perform ML operations"""
+        print("\nTesting tools with ML operations...")
+
+        def predict_tool(features: list) -> str:
+            """Tool that simulates ML predictions"""
+            return f"Prediction for features {features}: 0.85"
+
+        def train_tool(model_name: str, data_size: int) -> str:
+            """Tool that simulates model training"""
+            return f"Trained {model_name} with {data_size} samples"
+
+        agent = Agent(
+            agent_name="ML-Ops-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[predict_tool, train_tool],
+        )
+
+        # Test ML prediction
+        response = agent.run(
+            "Use predict_tool with features [1, 2, 3, 4]"
+        )
+        assert (
+            response is not None
+        ), "ML prediction tool execution failed"
+
+        # Test ML training
+        response = agent.run(
+            "Use train_tool with model 'random_forest' and data_size 1000"
+        )
+        assert (
+            response is not None
+        ), "ML training tool execution failed"
+
+        print("✓ Tools with ML operations test passed")
+
+    def test_tool_with_image_processing(self):
+        """Test tools that perform image processing"""
+        print("\nTesting tools with image processing...")
+
+        def resize_tool(
+            image_path: str, width: int, height: int
+        ) -> str:
+            """Tool that simulates image resizing"""
+            return f"Resized {image_path} to {width}x{height}"
+
+        def filter_tool(image_path: str, filter_type: str) -> str:
+            """Tool that simulates image filtering"""
+            return f"Applied {filter_type} filter to {image_path}"
+
+        agent = Agent(
+            agent_name="Image-Processing-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[resize_tool, filter_tool],
+        )
+
+        # Test image resizing
+        response = agent.run(
+            "Use resize_tool with image 'test.jpg', width 800, height 600"
+        )
+        assert (
+            response is not None
+        ), "Image resize tool execution failed"
+
+        # Test image filtering
+        response = agent.run(
+            "Use filter_tool with image 'test.jpg' and filter 'blur'"
+        )
+        assert (
+            response is not None
+        ), "Image filter tool execution failed"
+
+        print("✓ Tools with image processing test passed")
+
+    def test_tool_with_text_processing(self):
+        """Test tools that perform text processing"""
+        print("\nTesting tools with text processing...")
+
+        def tokenize_tool(text: str) -> list:
+            """Tool that tokenizes text"""
+            return text.split()
+
+        def translate_tool(text: str, target_lang: str) -> str:
+            """Tool that simulates translation"""
+            return f"Translated '{text}' to {target_lang}"
+
+        def sentiment_tool(text: str) -> str:
+            """Tool that simulates sentiment analysis"""
+            return f"Sentiment of '{text}': positive"
+
+        agent = Agent(
+            agent_name="Text-Processing-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[tokenize_tool, translate_tool, sentiment_tool],
+        )
+
+        # Test text tokenization
+        response = agent.run(
+            "Use tokenize_tool with 'Hello world this is a test'"
+        )
+        assert (
+            response is not None
+        ), "Text tokenization tool execution failed"
+
+        # Test translation
+        response = agent.run(
+            "Use translate_tool with 'Hello' and target_lang 'Spanish'"
+        )
+        assert (
+            response is not None
+        ), "Translation tool execution failed"
+
+        # Test sentiment analysis
+        response = agent.run(
+            "Use sentiment_tool with 'I love this product!'"
+        )
+        assert (
+            response is not None
+        ), "Sentiment analysis tool execution failed"
+
+        print("✓ Tools with text processing test passed")
+
+    def test_tool_with_mathematical_operations(self):
+        """Test tools that perform mathematical operations"""
+        print("\nTesting tools with mathematical operations...")
+
+        def matrix_multiply_tool(
+            matrix_a: list, matrix_b: list
+        ) -> list:
+            """Tool that multiplies matrices"""
+            # Simple 2x2 matrix multiplication
+            result = [[0, 0], [0, 0]]
+            for i in range(2):
+                for j in range(2):
+                    for k in range(2):
+                        result[i][j] += (
+                            matrix_a[i][k] * matrix_b[k][j]
+                        )
+            return result
+
+        def statistics_tool(data: list) -> dict:
+            """Tool that calculates statistics"""
+            return {
+                "mean": sum(data) / len(data),
+                "max": max(data),
+                "min": min(data),
+                "count": len(data),
+            }
+
+        def calculus_tool(function: str, x: float) -> str:
+            """Tool that simulates calculus operations"""
+            return f"Derivative of {function} at x={x}: 2*x"
+
+        agent = Agent(
+            agent_name="Math-Ops-Test-Agent",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            tools=[
+                matrix_multiply_tool,
+                statistics_tool,
+                calculus_tool,
+            ],
+        )
+
+        # Test matrix multiplication
+        response = agent.run(
+            "Use matrix_multiply_tool with [[1,2],[3,4]] and [[5,6],[7,8]]"
+        )
+        assert (
+            response is not None
+        ), "Matrix multiplication tool execution failed"
+
+        # Test statistics
+        response = agent.run(
+            "Use statistics_tool with [1, 2, 3, 4, 5]"
+        )
+        assert (
+            response is not None
+        ), "Statistics tool execution failed"
+
+        # Test calculus
+        response = agent.run("Use calculus_tool with 'x^2' and x=3")
+        assert response is not None, "Calculus tool execution failed"
+
+        print("✓ Tools with mathematical operations test passed")
+
+
+# ============================================================================
+# LLM ARGS AND HANDLING TESTS
+# ============================================================================
+
+
+class TestLLMArgsAndHandling:
+    """Test LLM arguments and handling functionality"""
+
+    def test_combined_llm_args(self):
+        """Test that llm_args, tools_list_dictionary, and MCP tools can be combined."""
+        print("\nTesting combined LLM args...")
+
+        # Mock tools list dictionary
+        tools_list = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "test_function",
+                    "description": "A test function",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "test_param": {
+                                "type": "string",
+                                "description": "A test parameter",
+                            }
+                        },
+                    },
+                },
+            }
+        ]
+
+        # Mock llm_args with Azure OpenAI specific parameters
+        llm_args = {
+            "api_version": "2024-02-15-preview",
+            "base_url": "https://your-resource.openai.azure.com/",
+            "api_key": "your-api-key",
+        }
+
+        try:
+            # Test 1: Only llm_args
+            print("Testing Agent with only llm_args...")
+            Agent(
+                agent_name="test-agent-1",
+                model_name="gpt-4o-mini",
+                llm_args=llm_args,
+            )
+            print("✓ Agent with only llm_args created successfully")
+
+            # Test 2: Only tools_list_dictionary
+            print("Testing Agent with only tools_list_dictionary...")
+            Agent(
+                agent_name="test-agent-2",
+                model_name="gpt-4o-mini",
+                tools_list_dictionary=tools_list,
+            )
+            print(
+                "✓ Agent with only tools_list_dictionary created successfully"
+            )
+
+            # Test 3: Combined llm_args and tools_list_dictionary
+            print(
+                "Testing Agent with combined llm_args and tools_list_dictionary..."
+            )
+            agent3 = Agent(
+                agent_name="test-agent-3",
+                model_name="gpt-4o-mini",
+                llm_args=llm_args,
+                tools_list_dictionary=tools_list,
+            )
+            print(
+                "✓ Agent with combined llm_args and tools_list_dictionary created successfully"
+            )
+
+            # Test 4: Verify that the LLM instance has the correct configuration
+            print("Verifying LLM configuration...")
+
+            # Check that agent3 has both llm_args and tools configured
+            assert (
+                agent3.llm_args == llm_args
+            ), "llm_args not preserved"
+            assert (
+                agent3.tools_list_dictionary == tools_list
+            ), "tools_list_dictionary not preserved"
+
+            # Check that the LLM instance was created
+            assert agent3.llm is not None, "LLM instance not created"
+
+            print("✓ LLM configuration verified successfully")
+            print("✓ Combined LLM args test passed")
+
+        except Exception as e:
+            print(f"✗ Combined LLM args test failed: {e}")
+            raise
+
+    def test_azure_openai_example(self):
+        """Test the Azure OpenAI example with api_version parameter."""
+        print("\nTesting Azure OpenAI example with api_version...")
+
+        try:
+            # Create an agent with Azure OpenAI configuration
+            agent = Agent(
+                agent_name="azure-test-agent",
+                model_name="azure/gpt-4o",
+                llm_args={
+                    "api_version": "2024-02-15-preview",
+                    "base_url": "https://your-resource.openai.azure.com/",
+                    "api_key": "your-api-key",
+                },
+                tools_list_dictionary=[
+                    {
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "description": "Get weather information",
+                            "parameters": {
+                                "type": "object",
+                                "properties": {
+                                    "location": {
+                                        "type": "string",
+                                        "description": "The city and state",
+                                    }
+                                },
+                            },
+                        },
+                    }
+                ],
+            )
+
+            print(
+                "✓ Azure OpenAI agent with combined parameters created successfully"
+            )
+
+            # Verify configuration
+            assert agent.llm_args is not None, "llm_args not set"
+            assert (
+                "api_version" in agent.llm_args
+            ), "api_version not in llm_args"
+            assert (
+                agent.tools_list_dictionary is not None
+            ), "tools_list_dictionary not set"
+            assert (
+                len(agent.tools_list_dictionary) > 0
+            ), "tools_list_dictionary is empty"
+
+            print("✓ Azure OpenAI configuration verified")
+            print("✓ Azure OpenAI example test passed")
+
+        except Exception as e:
+            print(f"✗ Azure OpenAI test failed: {e}")
+            raise
+
+    def test_llm_handling_args_kwargs(self):
+        """Test that llm_handling properly handles both args and kwargs."""
+        print("\nTesting LLM handling args and kwargs...")
+
+        # Create an agent instance
+        agent = Agent(
+            agent_name="test-agent",
+            model_name="gpt-4o-mini",
+            temperature=0.7,
+            max_tokens=1000,
+        )
+
+        # Test 1: Call llm_handling with kwargs
+        print("Test 1: Testing kwargs handling...")
+        try:
+            # This should work and add the kwargs to additional_args
+            agent.llm_handling(top_p=0.9, frequency_penalty=0.1)
+            print("✓ kwargs handling works")
+        except Exception as e:
+            print(f"✗ kwargs handling failed: {e}")
+            raise
+
+        # Test 2: Call llm_handling with args (dictionary)
+        print("Test 2: Testing args handling with dictionary...")
+        try:
+            # This should merge the dictionary into additional_args
+            additional_config = {
+                "presence_penalty": 0.2,
+                "logit_bias": {"123": 1},
+            }
+            agent.llm_handling(additional_config)
+            print("✓ args handling with dictionary works")
+        except Exception as e:
+            print(f"✗ args handling with dictionary failed: {e}")
+            raise
+
+        # Test 3: Call llm_handling with both args and kwargs
+        print("Test 3: Testing both args and kwargs...")
+        try:
+            # This should handle both
+            additional_config = {"presence_penalty": 0.3}
+            agent.llm_handling(
+                additional_config, top_p=0.8, frequency_penalty=0.2
+            )
+            print("✓ combined args and kwargs handling works")
+        except Exception as e:
+            print(f"✗ combined args and kwargs handling failed: {e}")
+            raise
+
+        # Test 4: Call llm_handling with non-dictionary args
+        print("Test 4: Testing non-dictionary args...")
+        try:
+            # This should store args under 'additional_args' key
+            agent.llm_handling(
+                "some_string", 123, ["list", "of", "items"]
+            )
+            print("✓ non-dictionary args handling works")
+        except Exception as e:
+            print(f"✗ non-dictionary args handling failed: {e}")
+            raise
+
+        print("✓ LLM handling args and kwargs test passed")
+
+
+# ============================================================================
+# MAIN TEST RUNNER
+# ============================================================================
+
+
+def run_all_tests():
+    """Run all test functions"""
+    print("Starting Merged Agent Test Suite...\n")
+
+    # Test classes to run
+    test_classes = [
+        TestBasicAgent,
+        TestAgentFeatures,
+        TestAgentLogging,
+        TestCreateAgentsFromYaml,
+        TestAgentBenchmark,
+        TestAgentToolUsage,
+        TestLLMArgsAndHandling,
     ]
-    responses = flow_instance.bulk_run(input_data)
-
-    # Ensure that the responses match the input tasks
-    assert responses[0] == "Response for Task 1"
-    assert responses[1] == "Response for Task 2"
-    assert responses[2] == "Response for Task 3"
 
-
-def test_flow_from_llm_and_template():
-    # Test creating Agent instance from an LLM and a template
-    llm_instance = mocked_llm  # Replace with your LLM class
-    template = "This is a template for testing."
-
-    flow_instance = Agent.from_llm_and_template(
-        llm_instance, template
-    )
-
-    assert isinstance(flow_instance, Agent)
-
-
-def test_flow_from_llm_and_template_file():
-    # Test creating Agent instance from an LLM and a template file
-    llm_instance = mocked_llm  # Replace with your LLM class
-    template_file = (
-        "template.txt"  # Create a template file for testing
-    )
-
-    flow_instance = Agent.from_llm_and_template_file(
-        llm_instance, template_file
-    )
-
-    assert isinstance(flow_instance, Agent)
-
-
-def test_flow_save_and_load(flow_instance, tmp_path):
-    # Test saving and loading the agent state
-    file_path = tmp_path / "flow_state.json"
-
-    # Save the state
-    flow_instance.save(file_path)
-
-    # Create a new instance and load the state
-    new_flow_instance = Agent(llm=mocked_llm, max_loops=5)
-    new_flow_instance.load(file_path)
-
-    # Ensure that the loaded state matches the original state
-    assert new_flow_instance.memory == flow_instance.memory
-
-
-def test_flow_validate_response(flow_instance):
-    # Test response validation
-    valid_response = "This is a valid response."
-    invalid_response = "Short."
-
-    assert flow_instance.validate_response(valid_response) is True
-    assert flow_instance.validate_response(invalid_response) is False
-
-
-# Add more test cases as needed for other methods and features of your Agent class
-
-# Finally, don't forget to run your tests using a testing framework like pytest
-
-# Assuming you have already defined your Agent class and created an instance for testing
-
-
-def test_flow_print_history_and_memory(capsys, flow_instance):
-    # Test printing the history and memory of the agent
-    history = ["User: Hi", "AI: Hello"]
-    flow_instance.memory = [history]
-
-    flow_instance.print_history_and_memory()
-
-    captured = capsys.readouterr()
-    assert "Agent History and Memory" in captured.out
-    assert "Loop 1:" in captured.out
-    assert "User: Hi" in captured.out
-    assert "AI: Hello" in captured.out
-
-
-def test_flow_run_with_timeout(flow_instance):
-    # Test running with a timeout
-    task = "Task with a long response time"
-    response = flow_instance.run_with_timeout(task, timeout=1)
-
-    # Ensure that the response is either the actual response or "Timeout"
-    assert response in ["Actual Response", "Timeout"]
+    total_tests = 0
+    passed_tests = 0
+    failed_tests = 0
+
+    for test_class in test_classes:
+        print(f"\n{'='*50}")
+        print(f"Running {test_class.__name__}")
+        print(f"{'='*50}")
+
+        # Create test instance
+        test_instance = test_class()
+
+        # Get all test methods
+        test_methods = [
+            method
+            for method in dir(test_instance)
+            if method.startswith("test_")
+        ]
+
+        for test_method in test_methods:
+            total_tests += 1
+            try:
+                # Run the test method
+                getattr(test_instance, test_method)()
+                passed_tests += 1
+                print(f"✓ {test_method}")
+            except Exception as e:
+                failed_tests += 1
+                print(f"✗ {test_method}: {str(e)}")
+
+    # Print summary
+    print(f"\n{'='*50}")
+    print("Test Summary")
+    print(f"{'='*50}")
+    print(f"Total Tests: {total_tests}")
+    print(f"Passed: {passed_tests}")
+    print(f"Failed: {failed_tests}")
+    print(f"Success Rate: {(passed_tests/total_tests)*100:.2f}%")
+
+    return {
+        "total": total_tests,
+        "passed": passed_tests,
+        "failed": failed_tests,
+        "success_rate": (passed_tests / total_tests) * 100,
+    }
 
 
-# Add more test cases as needed for other methods and features of your Agent class
+if __name__ == "__main__":
+    # Run all tests
+    results = run_all_tests()
 
-# Finally, don't forget to run your tests using a testing framework like pytest
+    print(results)
diff --git a/tests/structs/test_agent_features.py b/tests/structs/test_agent_features.py
deleted file mode 100644
index 22b6c3ea..00000000
--- a/tests/structs/test_agent_features.py
+++ /dev/null
@@ -1,600 +0,0 @@
-import asyncio
-import json
-import os
-import tempfile
-import time
-
-import yaml
-from swarm_models import OpenAIChat
-
-from swarms import Agent
-
-
-def test_basic_agent_functionality():
-    """Test basic agent initialization and simple task execution"""
-    print("\nTesting basic agent functionality...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(agent_name="Test-Agent", llm=model, max_loops=1)
-
-    response = agent.run("What is 2+2?")
-    assert response is not None, "Agent response should not be None"
-
-    # Test agent properties
-    assert (
-        agent.agent_name == "Test-Agent"
-    ), "Agent name not set correctly"
-    assert agent.max_loops == 1, "Max loops not set correctly"
-    assert agent.llm is not None, "LLM not initialized"
-
-    print("✓ Basic agent functionality test passed")
-
-
-def test_memory_management():
-    """Test agent memory management functionality"""
-    print("\nTesting memory management...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Memory-Test-Agent",
-        llm=model,
-        max_loops=1,
-        context_length=8192,
-    )
-
-    # Test adding to memory
-    agent.add_memory("Test memory entry")
-    assert (
-        "Test memory entry"
-        in agent.short_memory.return_history_as_string()
-    )
-
-    # Test memory query
-    agent.memory_query("Test query")
-
-    # Test token counting
-    tokens = agent.check_available_tokens()
-    assert isinstance(tokens, int), "Token count should be an integer"
-
-    print("✓ Memory management test passed")
-
-
-def test_agent_output_formats():
-    """Test all available output formats"""
-    print("\nTesting all output formats...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    test_task = "Say hello!"
-
-    output_types = {
-        "str": str,
-        "string": str,
-        "list": str,  # JSON string containing list
-        "json": str,  # JSON string
-        "dict": dict,
-        "yaml": str,
-    }
-
-    for output_type, expected_type in output_types.items():
-        agent = Agent(
-            agent_name=f"{output_type.capitalize()}-Output-Agent",
-            llm=model,
-            max_loops=1,
-            output_type=output_type,
-        )
-
-        response = agent.run(test_task)
-        assert (
-            response is not None
-        ), f"{output_type} output should not be None"
-
-        if output_type == "yaml":
-            # Verify YAML can be parsed
-            try:
-                yaml.safe_load(response)
-                print(f"✓ {output_type} output valid")
-            except yaml.YAMLError:
-                assert False, f"Invalid YAML output for {output_type}"
-        elif output_type in ["json", "list"]:
-            # Verify JSON can be parsed
-            try:
-                json.loads(response)
-                print(f"✓ {output_type} output valid")
-            except json.JSONDecodeError:
-                assert False, f"Invalid JSON output for {output_type}"
-
-    print("✓ Output formats test passed")
-
-
-def test_agent_state_management():
-    """Test comprehensive state management functionality"""
-    print("\nTesting state management...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-
-    # Create temporary directory for test files
-    with tempfile.TemporaryDirectory() as temp_dir:
-        state_path = os.path.join(temp_dir, "agent_state.json")
-
-        # Create agent with initial state
-        agent1 = Agent(
-            agent_name="State-Test-Agent",
-            llm=model,
-            max_loops=1,
-            saved_state_path=state_path,
-        )
-
-        # Add some data to the agent
-        agent1.run("Remember this: Test message 1")
-        agent1.add_memory("Test message 2")
-
-        # Save state
-        agent1.save()
-        assert os.path.exists(state_path), "State file not created"
-
-        # Create new agent and load state
-        agent2 = Agent(
-            agent_name="State-Test-Agent", llm=model, max_loops=1
-        )
-        agent2.load(state_path)
-
-        # Verify state loaded correctly
-        history2 = agent2.short_memory.return_history_as_string()
-        assert (
-            "Test message 1" in history2
-        ), "State not loaded correctly"
-        assert (
-            "Test message 2" in history2
-        ), "Memory not loaded correctly"
-
-        # Test autosave functionality
-        agent3 = Agent(
-            agent_name="Autosave-Test-Agent",
-            llm=model,
-            max_loops=1,
-            saved_state_path=os.path.join(
-                temp_dir, "autosave_state.json"
-            ),
-            autosave=True,
-        )
-
-        agent3.run("Test autosave")
-        time.sleep(2)  # Wait for autosave
-        assert os.path.exists(
-            os.path.join(temp_dir, "autosave_state.json")
-        ), "Autosave file not created"
-
-    print("✓ State management test passed")
-
-
-def test_agent_tools_and_execution():
-    """Test agent tool handling and execution"""
-    print("\nTesting tools and execution...")
-
-    def sample_tool(x: int, y: int) -> int:
-        """Sample tool that adds two numbers"""
-        return x + y
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Tools-Test-Agent",
-        llm=model,
-        max_loops=1,
-        tools=[sample_tool],
-    )
-
-    # Test adding tools
-    agent.add_tool(lambda x: x * 2)
-    assert len(agent.tools) == 2, "Tool not added correctly"
-
-    # Test removing tools
-    agent.remove_tool(sample_tool)
-    assert len(agent.tools) == 1, "Tool not removed correctly"
-
-    # Test tool execution
-    response = agent.run("Calculate 2 + 2 using the sample tool")
-    assert response is not None, "Tool execution failed"
-
-    print("✓ Tools and execution test passed")
-
-
-def test_agent_concurrent_execution():
-    """Test agent concurrent execution capabilities"""
-    print("\nTesting concurrent execution...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Concurrent-Test-Agent", llm=model, max_loops=1
-    )
-
-    # Test bulk run
-    tasks = [
-        {"task": "Count to 3"},
-        {"task": "Say hello"},
-        {"task": "Tell a short joke"},
-    ]
-
-    responses = agent.bulk_run(tasks)
-    assert len(responses) == len(tasks), "Not all tasks completed"
-    assert all(
-        response is not None for response in responses
-    ), "Some tasks failed"
-
-    # Test concurrent tasks
-    concurrent_responses = agent.run_concurrent_tasks(
-        ["Task 1", "Task 2", "Task 3"]
-    )
-    assert (
-        len(concurrent_responses) == 3
-    ), "Not all concurrent tasks completed"
-
-    print("✓ Concurrent execution test passed")
-
-
-def test_agent_error_handling():
-    """Test agent error handling and recovery"""
-    print("\nTesting error handling...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Error-Test-Agent",
-        llm=model,
-        max_loops=1,
-        retry_attempts=3,
-        retry_interval=1,
-    )
-
-    # Test invalid tool execution
-    try:
-        agent.parse_and_execute_tools("invalid_json")
-        print("✓ Invalid tool execution handled")
-    except Exception:
-        assert True, "Expected error caught"
-
-    # Test recovery after error
-    response = agent.run("Continue after error")
-    assert response is not None, "Agent failed to recover after error"
-
-    print("✓ Error handling test passed")
-
-
-def test_agent_configuration():
-    """Test agent configuration and parameters"""
-    print("\nTesting agent configuration...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Config-Test-Agent",
-        llm=model,
-        max_loops=1,
-        temperature=0.7,
-        max_tokens=4000,
-        context_length=8192,
-    )
-
-    # Test configuration methods
-    agent.update_system_prompt("New system prompt")
-    agent.update_max_loops(2)
-    agent.update_loop_interval(2)
-
-    # Verify updates
-    assert agent.max_loops == 2, "Max loops not updated"
-    assert agent.loop_interval == 2, "Loop interval not updated"
-
-    # Test configuration export
-    config_dict = agent.to_dict()
-    assert isinstance(
-        config_dict, dict
-    ), "Configuration export failed"
-
-    # Test YAML export
-    yaml_config = agent.to_yaml()
-    assert isinstance(yaml_config, str), "YAML export failed"
-
-    print("✓ Configuration test passed")
-
-
-def test_agent_with_stopping_condition():
-    """Test agent with custom stopping condition"""
-    print("\nTesting agent with stopping condition...")
-
-    def custom_stopping_condition(response: str) -> bool:
-        return "STOP" in response.upper()
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Stopping-Condition-Agent",
-        llm=model,
-        max_loops=5,
-        stopping_condition=custom_stopping_condition,
-    )
-
-    response = agent.run("Count up until you see the word STOP")
-    assert response is not None, "Stopping condition test failed"
-    print("✓ Stopping condition test passed")
-
-
-def test_agent_with_retry_mechanism():
-    """Test agent retry mechanism"""
-    print("\nTesting agent retry mechanism...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Retry-Test-Agent",
-        llm=model,
-        max_loops=1,
-        retry_attempts=3,
-        retry_interval=1,
-    )
-
-    response = agent.run("Tell me a joke.")
-    assert response is not None, "Retry mechanism test failed"
-    print("✓ Retry mechanism test passed")
-
-
-def test_bulk_and_filtered_operations():
-    """Test bulk operations and response filtering"""
-    print("\nTesting bulk and filtered operations...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Bulk-Filter-Test-Agent", llm=model, max_loops=1
-    )
-
-    # Test bulk run
-    bulk_tasks = [
-        {"task": "What is 2+2?"},
-        {"task": "Name a color"},
-        {"task": "Count to 3"},
-    ]
-    bulk_responses = agent.bulk_run(bulk_tasks)
-    assert len(bulk_responses) == len(
-        bulk_tasks
-    ), "Bulk run should return same number of responses as tasks"
-
-    # Test response filtering
-    agent.add_response_filter("color")
-    filtered_response = agent.filtered_run(
-        "What is your favorite color?"
-    )
-    assert (
-        "[FILTERED]" in filtered_response
-    ), "Response filter not applied"
-
-    print("✓ Bulk and filtered operations test passed")
-
-
-async def test_async_operations():
-    """Test asynchronous operations"""
-    print("\nTesting async operations...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Async-Test-Agent", llm=model, max_loops=1
-    )
-
-    # Test single async run
-    response = await agent.arun("What is 1+1?")
-    assert response is not None, "Async run failed"
-
-    # Test concurrent async runs
-    tasks = ["Task 1", "Task 2", "Task 3"]
-    responses = await asyncio.gather(
-        *[agent.arun(task) for task in tasks]
-    )
-    assert len(responses) == len(
-        tasks
-    ), "Not all async tasks completed"
-
-    print("✓ Async operations test passed")
-
-
-def test_memory_and_state_persistence():
-    """Test memory management and state persistence"""
-    print("\nTesting memory and state persistence...")
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        state_path = os.path.join(temp_dir, "test_state.json")
-
-        # Create agent with memory configuration
-        model = OpenAIChat(model_name="gpt-4.1")
-        agent1 = Agent(
-            agent_name="Memory-State-Test-Agent",
-            llm=model,
-            max_loops=1,
-            saved_state_path=state_path,
-            context_length=8192,
-            autosave=True,
-        )
-
-        # Test memory operations
-        agent1.add_memory("Important fact: The sky is blue")
-        agent1.memory_query("What color is the sky?")
-
-        # Save state
-        agent1.save()
-
-        # Create new agent and load state
-        agent2 = Agent(
-            agent_name="Memory-State-Test-Agent",
-            llm=model,
-            max_loops=1,
-        )
-        agent2.load(state_path)
-
-        # Verify memory persistence
-        memory_content = (
-            agent2.short_memory.return_history_as_string()
-        )
-        assert (
-            "sky is blue" in memory_content
-        ), "Memory not properly persisted"
-
-        print("✓ Memory and state persistence test passed")
-
-
-def test_sentiment_and_evaluation():
-    """Test sentiment analysis and response evaluation"""
-    print("\nTesting sentiment analysis and evaluation...")
-
-    def mock_sentiment_analyzer(text):
-        """Mock sentiment analyzer that returns a score between 0 and 1"""
-        return 0.7 if "positive" in text.lower() else 0.3
-
-    def mock_evaluator(response):
-        """Mock evaluator that checks response quality"""
-        return "GOOD" if len(response) > 10 else "BAD"
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Sentiment-Eval-Test-Agent",
-        llm=model,
-        max_loops=1,
-        sentiment_analyzer=mock_sentiment_analyzer,
-        sentiment_threshold=0.5,
-        evaluator=mock_evaluator,
-    )
-
-    # Test sentiment analysis
-    agent.run("Generate a positive message")
-
-    # Test evaluation
-    agent.run("Generate a detailed response")
-
-    print("✓ Sentiment and evaluation test passed")
-
-
-def test_tool_management():
-    """Test tool management functionality"""
-    print("\nTesting tool management...")
-
-    def tool1(x: int) -> int:
-        """Sample tool 1"""
-        return x * 2
-
-    def tool2(x: int) -> int:
-        """Sample tool 2"""
-        return x + 2
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Tool-Test-Agent",
-        llm=model,
-        max_loops=1,
-        tools=[tool1],
-    )
-
-    # Test adding tools
-    agent.add_tool(tool2)
-    assert len(agent.tools) == 2, "Tool not added correctly"
-
-    # Test removing tools
-    agent.remove_tool(tool1)
-    assert len(agent.tools) == 1, "Tool not removed correctly"
-
-    # Test adding multiple tools
-    agent.add_tools([tool1, tool2])
-    assert len(agent.tools) == 3, "Multiple tools not added correctly"
-
-    print("✓ Tool management test passed")
-
-
-def test_system_prompt_and_configuration():
-    """Test system prompt and configuration updates"""
-    print("\nTesting system prompt and configuration...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Config-Test-Agent", llm=model, max_loops=1
-    )
-
-    # Test updating system prompt
-    new_prompt = "You are a helpful assistant."
-    agent.update_system_prompt(new_prompt)
-    assert (
-        agent.system_prompt == new_prompt
-    ), "System prompt not updated"
-
-    # Test configuration updates
-    agent.update_max_loops(5)
-    assert agent.max_loops == 5, "Max loops not updated"
-
-    agent.update_loop_interval(2)
-    assert agent.loop_interval == 2, "Loop interval not updated"
-
-    # Test configuration export
-    config_dict = agent.to_dict()
-    assert isinstance(
-        config_dict, dict
-    ), "Configuration export failed"
-
-    print("✓ System prompt and configuration test passed")
-
-
-def test_agent_with_dynamic_temperature():
-    """Test agent with dynamic temperature"""
-    print("\nTesting agent with dynamic temperature...")
-
-    model = OpenAIChat(model_name="gpt-4.1")
-    agent = Agent(
-        agent_name="Dynamic-Temp-Agent",
-        llm=model,
-        max_loops=2,
-        dynamic_temperature_enabled=True,
-    )
-
-    response = agent.run("Generate a creative story.")
-    assert response is not None, "Dynamic temperature test failed"
-    print("✓ Dynamic temperature test passed")
-
-
-def run_all_tests():
-    """Run all test functions"""
-    print("Starting Extended Agent functional tests...\n")
-
-    test_functions = [
-        test_basic_agent_functionality,
-        test_memory_management,
-        test_agent_output_formats,
-        test_agent_state_management,
-        test_agent_tools_and_execution,
-        test_agent_concurrent_execution,
-        test_agent_error_handling,
-        test_agent_configuration,
-        test_agent_with_stopping_condition,
-        test_agent_with_retry_mechanism,
-        test_agent_with_dynamic_temperature,
-        test_bulk_and_filtered_operations,
-        test_memory_and_state_persistence,
-        test_sentiment_and_evaluation,
-        test_tool_management,
-        test_system_prompt_and_configuration,
-    ]
-
-    # Run synchronous tests
-    total_tests = len(test_functions) + 1  # +1 for async test
-    passed_tests = 0
-
-    for test in test_functions:
-        try:
-            test()
-            passed_tests += 1
-        except Exception as e:
-            print(f"✗ Test {test.__name__} failed: {str(e)}")
-
-    # Run async test
-    try:
-        asyncio.run(test_async_operations())
-        passed_tests += 1
-    except Exception as e:
-        print(f"✗ Async operations test failed: {str(e)}")
-
-    print("\nExtended Test Summary:")
-    print(f"Total Tests: {total_tests}")
-    print(f"Passed: {passed_tests}")
-    print(f"Failed: {total_tests - passed_tests}")
-    print(f"Success Rate: {(passed_tests/total_tests)*100:.2f}%")
-
-
-if __name__ == "__main__":
-    run_all_tests()
diff --git a/tests/structs/test_agentrearrange.py b/tests/structs/test_agentrearrange.py
deleted file mode 100644
index 42c3c1bc..00000000
--- a/tests/structs/test_agentrearrange.py
+++ /dev/null
@@ -1,328 +0,0 @@
-import os
-import traceback
-from datetime import datetime
-from typing import Callable, Dict, List, Optional
-
-from loguru import logger
-from swarm_models import OpenAIChat
-
-from swarms.structs.agent import Agent
-from swarms.structs.agent_rearrange import AgentRearrange
-
-
-class TestResult:
-    """Class to store test results and metadata"""
-
-    def __init__(self, test_name: str):
-        self.test_name = test_name
-        self.start_time = datetime.now()
-        self.end_time = None
-        self.success = False
-        self.error = None
-        self.traceback = None
-        self.function_output = None
-
-    def complete(
-        self, success: bool, error: Optional[Exception] = None
-    ):
-        """Complete the test execution with results"""
-        self.end_time = datetime.now()
-        self.success = success
-        if error:
-            self.error = str(error)
-            self.traceback = traceback.format_exc()
-
-    def duration(self) -> float:
-        """Calculate test duration in seconds"""
-        if self.end_time:
-            return (self.end_time - self.start_time).total_seconds()
-        return 0
-
-
-def run_test(test_func: Callable) -> TestResult:
-    """
-    Decorator to run tests with error handling and logging
-
-    Args:
-        test_func (Callable): Test function to execute
-
-    Returns:
-        TestResult: Object containing test execution details
-    """
-
-    def wrapper(*args, **kwargs) -> TestResult:
-        result = TestResult(test_func.__name__)
-        logger.info(
-            f"\n{'='*20} Running test: {test_func.__name__} {'='*20}"
-        )
-
-        try:
-            output = test_func(*args, **kwargs)
-            result.function_output = output
-            result.complete(success=True)
-            logger.success(
-                f"✅ Test {test_func.__name__} passed successfully"
-            )
-
-        except Exception as e:
-            result.complete(success=False, error=e)
-            logger.error(
-                f"❌ Test {test_func.__name__} failed with error: {str(e)}"
-            )
-            logger.error(f"Traceback: {traceback.format_exc()}")
-
-        logger.info(
-            f"Test duration: {result.duration():.2f} seconds\n"
-        )
-        return result
-
-    return wrapper
-
-
-def create_functional_agents() -> List[Agent]:
-    """
-    Create a list of functional agents with real LLM integration for testing.
-    Using OpenAI's GPT model for realistic agent behavior testing.
-    """
-    # Initialize OpenAI Chat model
-    api_key = os.getenv("OPENAI_API_KEY")
-    if not api_key:
-        logger.warning(
-            "No OpenAI API key found. Using mock agents instead."
-        )
-        return [
-            create_mock_agent("TestAgent1"),
-            create_mock_agent("TestAgent2"),
-        ]
-
-    try:
-        model = OpenAIChat(
-            api_key=api_key, model_name="gpt-4.1", temperature=0.1
-        )
-
-        # Create boss agent
-        boss_agent = Agent(
-            agent_name="BossAgent",
-            system_prompt="""
-            You are the BossAgent responsible for managing and overseeing test scenarios.
-            Your role is to coordinate tasks between agents and ensure efficient collaboration.
-            Analyze inputs, break down tasks, and provide clear directives to other agents.
-            Maintain a structured approach to task management and result compilation.
-            """,
-            llm=model,
-            max_loops=1,
-            dashboard=False,
-            streaming_on=True,
-            verbose=True,
-            stopping_token="<DONE>",
-            state_save_file_type="json",
-            saved_state_path="test_boss_agent.json",
-        )
-
-        # Create analysis agent
-        analysis_agent = Agent(
-            agent_name="AnalysisAgent",
-            system_prompt="""
-            You are the AnalysisAgent responsible for detailed data processing and analysis.
-            Your role is to examine input data, identify patterns, and provide analytical insights.
-            Focus on breaking down complex information into clear, actionable components.
-            """,
-            llm=model,
-            max_loops=1,
-            dashboard=False,
-            streaming_on=True,
-            verbose=True,
-            stopping_token="<DONE>",
-            state_save_file_type="json",
-            saved_state_path="test_analysis_agent.json",
-        )
-
-        # Create summary agent
-        summary_agent = Agent(
-            agent_name="SummaryAgent",
-            system_prompt="""
-            You are the SummaryAgent responsible for consolidating and summarizing information.
-            Your role is to take detailed analysis and create concise, actionable summaries.
-            Focus on highlighting key points and ensuring clarity in communication.
-            """,
-            llm=model,
-            max_loops=1,
-            dashboard=False,
-            streaming_on=True,
-            verbose=True,
-            stopping_token="<DONE>",
-            state_save_file_type="json",
-            saved_state_path="test_summary_agent.json",
-        )
-
-        logger.info(
-            "Successfully created functional agents with LLM integration"
-        )
-        return [boss_agent, analysis_agent, summary_agent]
-
-    except Exception as e:
-        logger.error(f"Failed to create functional agents: {str(e)}")
-        logger.warning("Falling back to mock agents")
-        return [
-            create_mock_agent("TestAgent1"),
-            create_mock_agent("TestAgent2"),
-        ]
-
-
-def create_mock_agent(name: str) -> Agent:
-    """Create a mock agent for testing when LLM integration is not available"""
-    return Agent(
-        agent_name=name,
-        system_prompt=f"You are a test agent named {name}",
-        llm=None,
-    )
-
-
-@run_test
-def test_init():
-    """Test AgentRearrange initialization with functional agents"""
-    logger.info("Creating agents for initialization test")
-    agents = create_functional_agents()
-
-    rearrange = AgentRearrange(
-        name="TestRearrange",
-        agents=agents,
-        flow=f"{agents[0].agent_name} -> {agents[1].agent_name} -> {agents[2].agent_name}",
-    )
-
-    assert rearrange.name == "TestRearrange"
-    assert len(rearrange.agents) == 3
-    assert (
-        rearrange.flow
-        == f"{agents[0].agent_name} -> {agents[1].agent_name} -> {agents[2].agent_name}"
-    )
-
-    logger.info(
-        f"Initialized AgentRearrange with {len(agents)} agents"
-    )
-    return True
-
-
-@run_test
-def test_validate_flow():
-    """Test flow validation logic"""
-    agents = create_functional_agents()
-    rearrange = AgentRearrange(
-        agents=agents,
-        flow=f"{agents[0].agent_name} -> {agents[1].agent_name}",
-    )
-
-    logger.info("Testing valid flow pattern")
-    valid = rearrange.validate_flow()
-    assert valid is True
-
-    logger.info("Testing invalid flow pattern")
-    rearrange.flow = f"{agents[0].agent_name} {agents[1].agent_name}"  # Missing arrow
-    try:
-        rearrange.validate_flow()
-        assert False, "Should have raised ValueError"
-    except ValueError as e:
-        logger.info(
-            f"Successfully caught invalid flow error: {str(e)}"
-        )
-        assert True
-
-    return True
-
-
-@run_test
-def test_add_remove_agent():
-    """Test adding and removing agents from the swarm"""
-    agents = create_functional_agents()
-    rearrange = AgentRearrange(
-        agents=agents[:2]
-    )  # Start with first two agents
-
-    logger.info("Testing agent addition")
-    new_agent = agents[2]  # Use the third agent as new agent
-    rearrange.add_agent(new_agent)
-    assert new_agent.agent_name in rearrange.agents
-
-    logger.info("Testing agent removal")
-    rearrange.remove_agent(new_agent.agent_name)
-    assert new_agent.agent_name not in rearrange.agents
-
-    return True
-
-
-@run_test
-def test_basic_run():
-    """Test basic task execution with the swarm"""
-    agents = create_functional_agents()
-    rearrange = AgentRearrange(
-        name="TestSwarm",
-        agents=agents,
-        flow=f"{agents[0].agent_name} -> {agents[1].agent_name} -> {agents[2].agent_name}",
-        max_loops=1,
-    )
-
-    test_task = (
-        "Analyze this test message and provide a brief summary."
-    )
-    logger.info(f"Running test task: {test_task}")
-
-    try:
-        result = rearrange.run(test_task)
-        assert result is not None
-        logger.info(
-            f"Successfully executed task with result length: {len(str(result))}"
-        )
-        return True
-    except Exception as e:
-        logger.error(f"Task execution failed: {str(e)}")
-        raise
-
-
-def run_all_tests() -> Dict[str, TestResult]:
-    """
-    Run all test cases and collect results
-
-    Returns:
-        Dict[str, TestResult]: Dictionary mapping test names to their results
-    """
-    logger.info("\n🚀 Starting AgentRearrange test suite execution")
-    test_functions = [
-        test_init,
-        test_validate_flow,
-        test_add_remove_agent,
-        test_basic_run,
-    ]
-
-    results = {}
-    for test in test_functions:
-        result = test()
-        results[test.__name__] = result
-
-    # Log summary
-    total_tests = len(results)
-    passed_tests = sum(1 for r in results.values() if r.success)
-    failed_tests = total_tests - passed_tests
-
-    logger.info("\n📊 Test Suite Summary:")
-    logger.info(f"Total Tests: {total_tests}")
-    print(f"✅ Passed: {passed_tests}")
-
-    if failed_tests > 0:
-        logger.error(f"❌ Failed: {failed_tests}")
-
-    # Detailed failure information
-    if failed_tests > 0:
-        logger.error("\n❌ Failed Tests Details:")
-        for name, result in results.items():
-            if not result.success:
-                logger.error(f"\n{name}:")
-                logger.error(f"Error: {result.error}")
-                logger.error(f"Traceback: {result.traceback}")
-
-    return results
-
-
-if __name__ == "__main__":
-    print("🌟 Starting AgentRearrange Test Suite")
-    results = run_all_tests()
-    print("🏁 Test Suite Execution Completed")
diff --git a/tests/structs/test_airflow_swarm.py b/tests/structs/test_airflow_swarm.py
deleted file mode 100644
index 0fdfeb20..00000000
--- a/tests/structs/test_airflow_swarm.py
+++ /dev/null
@@ -1,313 +0,0 @@
-import time
-
-from loguru import logger
-from swarms import Agent
-
-from experimental.airflow_swarm import (
-    AirflowDAGSwarm,
-    NodeType,
-    Conversation,
-)
-
-# Configure logger
-logger.remove()
-logger.add(lambda msg: print(msg, end=""), level="DEBUG")
-
-
-def test_swarm_initialization():
-    """Test basic swarm initialization and configuration."""
-    try:
-        swarm = AirflowDAGSwarm(
-            dag_id="test_dag",
-            name="Test DAG",
-            initial_message="Test message",
-        )
-        assert swarm.dag_id == "test_dag", "DAG ID not set correctly"
-        assert swarm.name == "Test DAG", "Name not set correctly"
-        assert (
-            len(swarm.nodes) == 0
-        ), "Nodes should be empty on initialization"
-        assert (
-            len(swarm.edges) == 0
-        ), "Edges should be empty on initialization"
-
-        # Test initial message
-        conv_json = swarm.get_conversation_history()
-        assert (
-            "Test message" in conv_json
-        ), "Initial message not set correctly"
-        print("✅ Swarm initialization test passed")
-        return True
-    except AssertionError as e:
-        print(f"❌ Swarm initialization test failed: {str(e)}")
-        return False
-
-
-def test_node_addition():
-    """Test adding different types of nodes to the swarm."""
-    try:
-        swarm = AirflowDAGSwarm(dag_id="test_dag")
-
-        # Test adding an agent node
-        agent = Agent(
-            agent_name="Test-Agent",
-            system_prompt="Test prompt",
-            model_name="gpt-4o-mini",
-            max_loops=1,
-        )
-        agent_id = swarm.add_node(
-            "test_agent",
-            agent,
-            NodeType.AGENT,
-            query="Test query",
-            concurrent=True,
-        )
-        assert (
-            agent_id == "test_agent"
-        ), "Agent node ID not returned correctly"
-        assert (
-            "test_agent" in swarm.nodes
-        ), "Agent node not added to nodes dict"
-
-        # Test adding a callable node
-        def test_callable(x: int, conversation: Conversation) -> str:
-            return f"Test output {x}"
-
-        callable_id = swarm.add_node(
-            "test_callable",
-            test_callable,
-            NodeType.CALLABLE,
-            args=[42],
-            concurrent=False,
-        )
-        assert (
-            callable_id == "test_callable"
-        ), "Callable node ID not returned correctly"
-        assert (
-            "test_callable" in swarm.nodes
-        ), "Callable node not added to nodes dict"
-
-        print("✅ Node addition test passed")
-        return True
-    except AssertionError as e:
-        print(f"❌ Node addition test failed: {str(e)}")
-        return False
-    except Exception as e:
-        print(
-            f"❌ Node addition test failed with unexpected error: {str(e)}"
-        )
-        return False
-
-
-def test_edge_addition():
-    """Test adding edges between nodes."""
-    try:
-        swarm = AirflowDAGSwarm(dag_id="test_dag")
-
-        # Add two nodes
-        def node1_fn(conversation: Conversation) -> str:
-            return "Node 1 output"
-
-        def node2_fn(conversation: Conversation) -> str:
-            return "Node 2 output"
-
-        swarm.add_node("node1", node1_fn, NodeType.CALLABLE)
-        swarm.add_node("node2", node2_fn, NodeType.CALLABLE)
-
-        # Add edge between them
-        swarm.add_edge("node1", "node2")
-
-        assert (
-            "node2" in swarm.edges["node1"]
-        ), "Edge not added correctly"
-        assert (
-            len(swarm.edges["node1"]) == 1
-        ), "Incorrect number of edges"
-
-        # Test adding edge with non-existent node
-        try:
-            swarm.add_edge("node1", "non_existent")
-            assert (
-                False
-            ), "Should raise ValueError for non-existent node"
-        except ValueError:
-            pass
-
-        print("✅ Edge addition test passed")
-        return True
-    except AssertionError as e:
-        print(f"❌ Edge addition test failed: {str(e)}")
-        return False
-
-
-def test_execution_order():
-    """Test that nodes are executed in the correct order based on dependencies."""
-    try:
-        swarm = AirflowDAGSwarm(dag_id="test_dag")
-        execution_order = []
-
-        def node1(conversation: Conversation) -> str:
-            execution_order.append("node1")
-            return "Node 1 output"
-
-        def node2(conversation: Conversation) -> str:
-            execution_order.append("node2")
-            return "Node 2 output"
-
-        def node3(conversation: Conversation) -> str:
-            execution_order.append("node3")
-            return "Node 3 output"
-
-        # Add nodes
-        swarm.add_node(
-            "node1", node1, NodeType.CALLABLE, concurrent=False
-        )
-        swarm.add_node(
-            "node2", node2, NodeType.CALLABLE, concurrent=False
-        )
-        swarm.add_node(
-            "node3", node3, NodeType.CALLABLE, concurrent=False
-        )
-
-        # Add edges to create a chain: node1 -> node2 -> node3
-        swarm.add_edge("node1", "node2")
-        swarm.add_edge("node2", "node3")
-
-        # Execute
-        swarm.run()
-
-        # Check execution order
-        assert execution_order == [
-            "node1",
-            "node2",
-            "node3",
-        ], "Incorrect execution order"
-        print("✅ Execution order test passed")
-        return True
-    except AssertionError as e:
-        print(f"❌ Execution order test failed: {str(e)}")
-        return False
-
-
-def test_concurrent_execution():
-    """Test concurrent execution of nodes."""
-    try:
-        swarm = AirflowDAGSwarm(dag_id="test_dag")
-
-        def slow_node1(conversation: Conversation) -> str:
-            time.sleep(0.5)
-            return "Slow node 1 output"
-
-        def slow_node2(conversation: Conversation) -> str:
-            time.sleep(0.5)
-            return "Slow node 2 output"
-
-        # Add nodes with concurrent=True
-        swarm.add_node(
-            "slow1", slow_node1, NodeType.CALLABLE, concurrent=True
-        )
-        swarm.add_node(
-            "slow2", slow_node2, NodeType.CALLABLE, concurrent=True
-        )
-
-        # Measure execution time
-        start_time = time.time()
-        swarm.run()
-        execution_time = time.time() - start_time
-
-        # Should take ~0.5s for concurrent execution, not ~1s
-        assert (
-            execution_time < 0.8
-        ), "Concurrent execution took too long"
-        print("✅ Concurrent execution test passed")
-        return True
-    except AssertionError as e:
-        print(f"❌ Concurrent execution test failed: {str(e)}")
-        return False
-
-
-def test_conversation_handling():
-    """Test conversation management within the swarm."""
-    try:
-        swarm = AirflowDAGSwarm(
-            dag_id="test_dag", initial_message="Initial test message"
-        )
-
-        # Test adding user messages
-        swarm.add_user_message("Test message 1")
-        swarm.add_user_message("Test message 2")
-
-        history = swarm.get_conversation_history()
-        assert (
-            "Initial test message" in history
-        ), "Initial message not in history"
-        assert (
-            "Test message 1" in history
-        ), "First message not in history"
-        assert (
-            "Test message 2" in history
-        ), "Second message not in history"
-
-        print("✅ Conversation handling test passed")
-        return True
-    except AssertionError as e:
-        print(f"❌ Conversation handling test failed: {str(e)}")
-        return False
-
-
-def test_error_handling():
-    """Test error handling in node execution."""
-    try:
-        swarm = AirflowDAGSwarm(dag_id="test_dag")
-
-        def failing_node(conversation: Conversation) -> str:
-            raise ValueError("Test error")
-
-        swarm.add_node("failing", failing_node, NodeType.CALLABLE)
-
-        # Execute should not raise an exception
-        result = swarm.run()
-
-        assert (
-            "Error" in result
-        ), "Error not captured in execution result"
-        assert (
-            "Test error" in result
-        ), "Specific error message not captured"
-
-        print("✅ Error handling test passed")
-        return True
-    except Exception as e:
-        print(f"❌ Error handling test failed: {str(e)}")
-        return False
-
-
-def run_all_tests():
-    """Run all test functions and report results."""
-    tests = [
-        test_swarm_initialization,
-        test_node_addition,
-        test_edge_addition,
-        test_execution_order,
-        test_concurrent_execution,
-        test_conversation_handling,
-        test_error_handling,
-    ]
-
-    results = []
-    for test in tests:
-        print(f"\nRunning {test.__name__}...")
-        result = test()
-        results.append(result)
-
-    total = len(results)
-    passed = sum(results)
-    print("\n=== Test Results ===")
-    print(f"Total tests: {total}")
-    print(f"Passed: {passed}")
-    print(f"Failed: {total - passed}")
-    print("==================")
-
-
-if __name__ == "__main__":
-    run_all_tests()
diff --git a/tests/structs/test_auto_swarm_builder_fix.py b/tests/structs/test_auto_swarm_builder_fix.py
deleted file mode 100644
index 420c1892..00000000
--- a/tests/structs/test_auto_swarm_builder_fix.py
+++ /dev/null
@@ -1,293 +0,0 @@
-"""
-Tests for bug #1115 fix in AutoSwarmBuilder.
-
-This test module verifies the fix for AttributeError when creating agents
-from AgentSpec Pydantic models in AutoSwarmBuilder.
-
-Bug: https://github.com/kyegomez/swarms/issues/1115
-"""
-
-import pytest
-
-from swarms.structs.agent import Agent
-from swarms.structs.auto_swarm_builder import (
-    AgentSpec,
-    AutoSwarmBuilder,
-)
-from swarms.structs.ma_utils import set_random_models_for_agents
-
-
-class TestAutoSwarmBuilderFix:
-    """Tests for bug #1115 fix in AutoSwarmBuilder."""
-
-    def test_create_agents_from_specs_with_dict(self):
-        """Test that create_agents_from_specs handles dict input correctly."""
-        builder = AutoSwarmBuilder()
-
-        # Create specs as a dictionary
-        specs = {
-            "agents": [
-                {
-                    "agent_name": "test_agent_1",
-                    "description": "Test agent 1 description",
-                    "system_prompt": "You are a helpful assistant",
-                    "model_name": "gpt-4o-mini",
-                    "max_loops": 1,
-                }
-            ]
-        }
-
-        agents = builder.create_agents_from_specs(specs)
-
-        # Verify agents were created correctly
-        assert len(agents) == 1
-        assert isinstance(agents[0], Agent)
-        assert agents[0].agent_name == "test_agent_1"
-
-        # Verify description was mapped to agent_description
-        assert hasattr(agents[0], "agent_description")
-        assert (
-            agents[0].agent_description == "Test agent 1 description"
-        )
-
-    def test_create_agents_from_specs_with_pydantic(self):
-        """Test that create_agents_from_specs handles Pydantic model input correctly.
-
-        This is the main test for bug #1115 - it verifies that AgentSpec
-        Pydantic models can be unpacked correctly.
-        """
-        builder = AutoSwarmBuilder()
-
-        # Create specs as Pydantic AgentSpec objects
-        agent_spec = AgentSpec(
-            agent_name="test_agent_pydantic",
-            description="Pydantic test agent",
-            system_prompt="You are a helpful assistant",
-            model_name="gpt-4o-mini",
-            max_loops=1,
-        )
-
-        specs = {"agents": [agent_spec]}
-
-        agents = builder.create_agents_from_specs(specs)
-
-        # Verify agents were created correctly
-        assert len(agents) == 1
-        assert isinstance(agents[0], Agent)
-        assert agents[0].agent_name == "test_agent_pydantic"
-
-        # Verify description was mapped to agent_description
-        assert hasattr(agents[0], "agent_description")
-        assert agents[0].agent_description == "Pydantic test agent"
-
-    def test_parameter_name_mapping(self):
-        """Test that 'description' field maps to 'agent_description' correctly."""
-        builder = AutoSwarmBuilder()
-
-        # Test with dict that has 'description'
-        specs = {
-            "agents": [
-                {
-                    "agent_name": "mapping_test",
-                    "description": "This should map to agent_description",
-                    "system_prompt": "You are helpful",
-                }
-            ]
-        }
-
-        agents = builder.create_agents_from_specs(specs)
-
-        assert len(agents) == 1
-        agent = agents[0]
-
-        # Verify description was mapped
-        assert hasattr(agent, "agent_description")
-        assert (
-            agent.agent_description
-            == "This should map to agent_description"
-        )
-
-    def test_create_agents_from_specs_mixed_input(self):
-        """Test that create_agents_from_specs handles mixed dict and Pydantic input."""
-        builder = AutoSwarmBuilder()
-
-        # Mix of dict and Pydantic objects
-        dict_spec = {
-            "agent_name": "dict_agent",
-            "description": "Dict agent description",
-            "system_prompt": "You are helpful",
-        }
-
-        pydantic_spec = AgentSpec(
-            agent_name="pydantic_agent",
-            description="Pydantic agent description",
-            system_prompt="You are smart",
-        )
-
-        specs = {"agents": [dict_spec, pydantic_spec]}
-
-        agents = builder.create_agents_from_specs(specs)
-
-        # Verify both agents were created
-        assert len(agents) == 2
-        assert all(isinstance(agent, Agent) for agent in agents)
-
-        # Verify both have correct descriptions
-        dict_agent = next(
-            a for a in agents if a.agent_name == "dict_agent"
-        )
-        pydantic_agent = next(
-            a for a in agents if a.agent_name == "pydantic_agent"
-        )
-
-        assert (
-            dict_agent.agent_description == "Dict agent description"
-        )
-        assert (
-            pydantic_agent.agent_description
-            == "Pydantic agent description"
-        )
-
-    def test_set_random_models_for_agents_with_valid_agents(
-        self,
-    ):
-        """Test set_random_models_for_agents with proper Agent objects."""
-        # Create proper Agent objects
-        agents = [
-            Agent(
-                agent_name="agent1",
-                system_prompt="You are agent 1",
-                max_loops=1,
-            ),
-            Agent(
-                agent_name="agent2",
-                system_prompt="You are agent 2",
-                max_loops=1,
-            ),
-        ]
-
-        # Set random models
-        model_names = ["gpt-4o-mini", "gpt-4o", "claude-3-5-sonnet"]
-        result = set_random_models_for_agents(
-            agents=agents, model_names=model_names
-        )
-
-        # Verify results
-        assert len(result) == 2
-        assert all(isinstance(agent, Agent) for agent in result)
-        assert all(hasattr(agent, "model_name") for agent in result)
-        assert all(
-            agent.model_name in model_names for agent in result
-        )
-
-    def test_set_random_models_for_agents_with_single_agent(
-        self,
-    ):
-        """Test set_random_models_for_agents with a single agent."""
-        agent = Agent(
-            agent_name="single_agent",
-            system_prompt="You are helpful",
-            max_loops=1,
-        )
-
-        model_names = ["gpt-4o-mini", "gpt-4o"]
-        result = set_random_models_for_agents(
-            agents=agent, model_names=model_names
-        )
-
-        assert isinstance(result, Agent)
-        assert hasattr(result, "model_name")
-        assert result.model_name in model_names
-
-    def test_set_random_models_for_agents_with_none(self):
-        """Test set_random_models_for_agents with None returns random model name."""
-        model_names = ["gpt-4o-mini", "gpt-4o", "claude-3-5-sonnet"]
-        result = set_random_models_for_agents(
-            agents=None, model_names=model_names
-        )
-
-        assert isinstance(result, str)
-        assert result in model_names
-
-    @pytest.mark.skip(
-        reason="This test requires API key and makes LLM calls"
-    )
-    def test_auto_swarm_builder_return_agents_objects_integration(
-        self,
-    ):
-        """Integration test for AutoSwarmBuilder with execution_type='return-agents-objects'.
-
-        This test requires OPENAI_API_KEY and makes actual LLM calls.
-        Run manually with: pytest -k test_auto_swarm_builder_return_agents_objects_integration -v
-        """
-        builder = AutoSwarmBuilder(
-            execution_type="return-agents-objects",
-            model_name="gpt-4o-mini",
-            max_loops=1,
-            verbose=False,
-        )
-
-        agents = builder.run(
-            "Create a team of 2 data analysis agents with specific roles"
-        )
-
-        # Verify agents were created
-        assert isinstance(agents, list)
-        assert len(agents) >= 1
-        assert all(isinstance(agent, Agent) for agent in agents)
-        assert all(hasattr(agent, "agent_name") for agent in agents)
-        assert all(
-            hasattr(agent, "agent_description") for agent in agents
-        )
-
-    def test_agent_spec_to_agent_all_fields(self):
-        """Test that all AgentSpec fields are properly passed to Agent."""
-        builder = AutoSwarmBuilder()
-
-        agent_spec = AgentSpec(
-            agent_name="full_test_agent",
-            description="Full test description",
-            system_prompt="You are a comprehensive test agent",
-            model_name="gpt-4o-mini",
-            auto_generate_prompt=False,
-            max_tokens=4096,
-            temperature=0.7,
-            role="worker",
-            max_loops=3,
-            goal="Test all parameters",
-        )
-
-        agents = builder.create_agents_from_specs(
-            {"agents": [agent_spec]}
-        )
-
-        assert len(agents) == 1
-        agent = agents[0]
-
-        # Verify all fields were set
-        assert agent.agent_name == "full_test_agent"
-        assert agent.agent_description == "Full test description"
-        # Agent may modify system_prompt by adding additional instructions
-        assert (
-            "You are a comprehensive test agent"
-            in agent.system_prompt
-        )
-        assert agent.max_loops == 3
-        assert agent.max_tokens == 4096
-        assert agent.temperature == 0.7
-
-    def test_create_agents_from_specs_empty_list(self):
-        """Test that create_agents_from_specs handles empty agent list."""
-        builder = AutoSwarmBuilder()
-
-        specs = {"agents": []}
-
-        agents = builder.create_agents_from_specs(specs)
-
-        assert isinstance(agents, list)
-        assert len(agents) == 0
-
-
-if __name__ == "__main__":
-    # Run tests with pytest
-    pytest.main([__file__, "-v", "--tb=short"])
diff --git a/tests/structs/test_auto_swarms_builder.py b/tests/structs/test_auto_swarms_builder.py
index 4d690678..2bf0d005 100644
--- a/tests/structs/test_auto_swarms_builder.py
+++ b/tests/structs/test_auto_swarms_builder.py
@@ -1,10 +1,18 @@
-from swarms.structs.auto_swarm_builder import AutoSwarmBuilder
+import pytest
 from dotenv import load_dotenv
 
+from swarms.structs.agent import Agent
+from swarms.structs.auto_swarm_builder import (
+    AgentSpec,
+    AutoSwarmBuilder,
+)
+from swarms.structs.ma_utils import set_random_models_for_agents
+
 load_dotenv()
 
 
 def print_separator():
+    """Print a separator line for test output formatting."""
     print("\n" + "=" * 50)
 
 
@@ -194,5 +202,271 @@ def run_all_tests():
         raise
 
 
+# Bug Fix Tests (from test_auto_swarm_builder_fix.py)
+class TestAutoSwarmBuilderFix:
+    """Tests for bug #1115 fix in AutoSwarmBuilder."""
+
+    def test_create_agents_from_specs_with_dict(self):
+        """Test that create_agents_from_specs handles dict input correctly."""
+        builder = AutoSwarmBuilder()
+
+        # Create specs as a dictionary
+        specs = {
+            "agents": [
+                {
+                    "agent_name": "test_agent_1",
+                    "description": "Test agent 1 description",
+                    "system_prompt": "You are a helpful assistant",
+                    "model_name": "gpt-4o-mini",
+                    "max_loops": 1,
+                }
+            ]
+        }
+
+        agents = builder.create_agents_from_specs(specs)
+
+        # Verify agents were created correctly
+        assert len(agents) == 1
+        assert isinstance(agents[0], Agent)
+        assert agents[0].agent_name == "test_agent_1"
+
+        # Verify description was mapped to agent_description
+        assert hasattr(agents[0], "agent_description")
+        assert (
+            agents[0].agent_description == "Test agent 1 description"
+        )
+
+    def test_create_agents_from_specs_with_pydantic(self):
+        """Test that create_agents_from_specs handles Pydantic model input correctly.
+
+        This is the main test for bug #1115 - it verifies that AgentSpec
+        Pydantic models can be unpacked correctly.
+        """
+        builder = AutoSwarmBuilder()
+
+        # Create specs as Pydantic AgentSpec objects
+        agent_spec = AgentSpec(
+            agent_name="test_agent_pydantic",
+            description="Pydantic test agent",
+            system_prompt="You are a helpful assistant",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+        )
+
+        specs = {"agents": [agent_spec]}
+
+        agents = builder.create_agents_from_specs(specs)
+
+        # Verify agents were created correctly
+        assert len(agents) == 1
+        assert isinstance(agents[0], Agent)
+        assert agents[0].agent_name == "test_agent_pydantic"
+
+        # Verify description was mapped to agent_description
+        assert hasattr(agents[0], "agent_description")
+        assert agents[0].agent_description == "Pydantic test agent"
+
+    def test_parameter_name_mapping(self):
+        """Test that 'description' field maps to 'agent_description' correctly."""
+        builder = AutoSwarmBuilder()
+
+        # Test with dict that has 'description'
+        specs = {
+            "agents": [
+                {
+                    "agent_name": "mapping_test",
+                    "description": "This should map to agent_description",
+                    "system_prompt": "You are helpful",
+                }
+            ]
+        }
+
+        agents = builder.create_agents_from_specs(specs)
+
+        assert len(agents) == 1
+        agent = agents[0]
+
+        # Verify description was mapped
+        assert hasattr(agent, "agent_description")
+        assert (
+            agent.agent_description
+            == "This should map to agent_description"
+        )
+
+    def test_create_agents_from_specs_mixed_input(self):
+        """Test that create_agents_from_specs handles mixed dict and Pydantic input."""
+        builder = AutoSwarmBuilder()
+
+        # Mix of dict and Pydantic objects
+        dict_spec = {
+            "agent_name": "dict_agent",
+            "description": "Dict agent description",
+            "system_prompt": "You are helpful",
+        }
+
+        pydantic_spec = AgentSpec(
+            agent_name="pydantic_agent",
+            description="Pydantic agent description",
+            system_prompt="You are smart",
+        )
+
+        specs = {"agents": [dict_spec, pydantic_spec]}
+
+        agents = builder.create_agents_from_specs(specs)
+
+        # Verify both agents were created
+        assert len(agents) == 2
+        assert all(isinstance(agent, Agent) for agent in agents)
+
+        # Verify both have correct descriptions
+        dict_agent = next(
+            a for a in agents if a.agent_name == "dict_agent"
+        )
+        pydantic_agent = next(
+            a for a in agents if a.agent_name == "pydantic_agent"
+        )
+
+        assert (
+            dict_agent.agent_description == "Dict agent description"
+        )
+        assert (
+            pydantic_agent.agent_description
+            == "Pydantic agent description"
+        )
+
+    def test_set_random_models_for_agents_with_valid_agents(self):
+        """Test set_random_models_for_agents with proper Agent objects."""
+        # Create proper Agent objects
+        agents = [
+            Agent(
+                agent_name="agent1",
+                system_prompt="You are agent 1",
+                max_loops=1,
+            ),
+            Agent(
+                agent_name="agent2",
+                system_prompt="You are agent 2",
+                max_loops=1,
+            ),
+        ]
+
+        # Set random models
+        model_names = ["gpt-4o-mini", "gpt-4o", "claude-3-5-sonnet"]
+        result = set_random_models_for_agents(
+            agents=agents, model_names=model_names
+        )
+
+        # Verify results
+        assert len(result) == 2
+        assert all(isinstance(agent, Agent) for agent in result)
+        assert all(hasattr(agent, "model_name") for agent in result)
+        assert all(
+            agent.model_name in model_names for agent in result
+        )
+
+    def test_set_random_models_for_agents_with_single_agent(self):
+        """Test set_random_models_for_agents with a single agent."""
+        agent = Agent(
+            agent_name="single_agent",
+            system_prompt="You are helpful",
+            max_loops=1,
+        )
+
+        model_names = ["gpt-4o-mini", "gpt-4o"]
+        result = set_random_models_for_agents(
+            agents=agent, model_names=model_names
+        )
+
+        assert isinstance(result, Agent)
+        assert hasattr(result, "model_name")
+        assert result.model_name in model_names
+
+    def test_set_random_models_for_agents_with_none(self):
+        """Test set_random_models_for_agents with None returns random model name."""
+        model_names = ["gpt-4o-mini", "gpt-4o", "claude-3-5-sonnet"]
+        result = set_random_models_for_agents(
+            agents=None, model_names=model_names
+        )
+
+        assert isinstance(result, str)
+        assert result in model_names
+
+    @pytest.mark.skip(
+        reason="This test requires API key and makes LLM calls"
+    )
+    def test_auto_swarm_builder_return_agents_objects_integration(self):
+        """Integration test for AutoSwarmBuilder with execution_type='return-agents-objects'.
+
+        This test requires OPENAI_API_KEY and makes actual LLM calls.
+        Run manually with: pytest -k test_auto_swarm_builder_return_agents_objects_integration -v
+        """
+        builder = AutoSwarmBuilder(
+            execution_type="return-agents-objects",
+            model_name="gpt-4o-mini",
+            max_loops=1,
+            verbose=False,
+        )
+
+        agents = builder.run(
+            "Create a team of 2 data analysis agents with specific roles"
+        )
+
+        # Verify agents were created
+        assert isinstance(agents, list)
+        assert len(agents) >= 1
+        assert all(isinstance(agent, Agent) for agent in agents)
+        assert all(hasattr(agent, "agent_name") for agent in agents)
+        assert all(
+            hasattr(agent, "agent_description") for agent in agents
+        )
+
+    def test_agent_spec_to_agent_all_fields(self):
+        """Test that all AgentSpec fields are properly passed to Agent."""
+        builder = AutoSwarmBuilder()
+
+        agent_spec = AgentSpec(
+            agent_name="full_test_agent",
+            description="Full test description",
+            system_prompt="You are a comprehensive test agent",
+            model_name="gpt-4o-mini",
+            auto_generate_prompt=False,
+            max_tokens=4096,
+            temperature=0.7,
+            role="worker",
+            max_loops=3,
+            goal="Test all parameters",
+        )
+
+        agents = builder.create_agents_from_specs(
+            {"agents": [agent_spec]}
+        )
+
+        assert len(agents) == 1
+        agent = agents[0]
+
+        # Verify all fields were set
+        assert agent.agent_name == "full_test_agent"
+        assert agent.agent_description == "Full test description"
+        # Agent may modify system_prompt by adding additional instructions
+        assert (
+            "You are a comprehensive test agent"
+            in agent.system_prompt
+        )
+        assert agent.max_loops == 3
+        assert agent.max_tokens == 4096
+        assert agent.temperature == 0.7
+
+    def test_create_agents_from_specs_empty_list(self):
+        """Test that create_agents_from_specs handles empty agent list."""
+        builder = AutoSwarmBuilder()
+
+        specs = {"agents": []}
+
+        agents = builder.create_agents_from_specs(specs)
+
+        assert isinstance(agents, list)
+        assert len(agents) == 0
+
+
 if __name__ == "__main__":
     run_all_tests()
diff --git a/tests/structs/test_base_workflow.py b/tests/structs/test_base_workflow.py
deleted file mode 100644
index fbb8d710..00000000
--- a/tests/structs/test_base_workflow.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import json
-import os
-
-import pytest
-from dotenv import load_dotenv
-
-from swarm_models import OpenAIChat
-from swarms.structs import BaseWorkflow
-
-load_dotenv()
-
-api_key = os.environ.get("OPENAI_API_KEY")
-
-
-def setup_workflow():
-    llm = OpenAIChat(openai_api_key=api_key)
-    workflow = BaseWorkflow(max_loops=1)
-    workflow.add("What's the weather in miami", llm)
-    workflow.add("Create a report on these metrics", llm)
-    workflow.save_workflow_state("workflow_state.json")
-    return workflow
-
-
-def teardown_workflow():
-    os.remove("workflow_state.json")
-
-
-def test_load_workflow_state():
-    workflow = setup_workflow()
-    workflow.load_workflow_state("workflow_state.json")
-    assert workflow.max_loops == 1
-    assert len(workflow.tasks) == 2
-    assert (
-        workflow.tasks[0].description == "What's the weather in miami"
-    )
-    assert (
-        workflow.tasks[1].description
-        == "Create a report on these metrics"
-    )
-    teardown_workflow()
-
-
-def test_load_workflow_state_with_missing_file():
-    workflow = setup_workflow()
-    with pytest.raises(FileNotFoundError):
-        workflow.load_workflow_state("non_existent_file.json")
-    teardown_workflow()
-
-
-def test_load_workflow_state_with_invalid_file():
-    workflow = setup_workflow()
-    with open("invalid_file.json", "w") as f:
-        f.write("This is not valid JSON")
-    with pytest.raises(json.JSONDecodeError):
-        workflow.load_workflow_state("invalid_file.json")
-    os.remove("invalid_file.json")
-    teardown_workflow()
-
-
-def test_load_workflow_state_with_missing_keys():
-    workflow = setup_workflow()
-    with open("missing_keys.json", "w") as f:
-        json.dump({"max_loops": 1}, f)
-    with pytest.raises(KeyError):
-        workflow.load_workflow_state("missing_keys.json")
-    os.remove("missing_keys.json")
-    teardown_workflow()
diff --git a/tests/structs/test_board_of_directors_swarm.py b/tests/structs/test_board_of_directors_swarm.py
index cd85b81e..1865d7d1 100644
--- a/tests/structs/test_board_of_directors_swarm.py
+++ b/tests/structs/test_board_of_directors_swarm.py
@@ -1,1202 +1,220 @@
-"""
-Comprehensive test suite for Board of Directors Swarm.
-
-This module contains extensive tests for the Board of Directors swarm implementation,
-covering all aspects including initialization, board operations, task execution,
-error handling, and performance characteristics.
-
-The test suite follows the Swarms testing philosophy:
-- Comprehensive coverage of all functionality
-- Proper mocking and isolation
-- Performance and integration testing
-- Error handling validation
-"""
-
-import os
-import pytest
-import asyncio
-from unittest.mock import Mock, patch, AsyncMock
-
-from swarms.structs.board_of_directors_swarm import (
-    BoardOfDirectorsSwarm,
-    BoardMember,
-    BoardMemberRole,
-    BoardDecisionType,
-    BoardOrder,
-    BoardDecision,
-    BoardSpec,
-)
-from swarms.structs.agent import Agent
-
-
-# Test fixtures
-@pytest.fixture
-def mock_agent():
-    """Create a mock agent for testing."""
-    agent = Mock(spec=Agent)
-    agent.agent_name = "TestAgent"
-    agent.agent_description = "A test agent for unit testing"
-    agent.run = Mock(return_value="Test agent response")
-    agent.arun = AsyncMock(return_value="Async test agent response")
-    return agent
-
-
-@pytest.fixture
-def mock_board_member(mock_agent):
-    """Create a mock board member for testing."""
-    return BoardMember(
-        agent=mock_agent,
-        role=BoardMemberRole.CHAIRMAN,
-        voting_weight=1.5,
-        expertise_areas=["leadership", "strategy"],
-    )
-
-
-@pytest.fixture
-def sample_agents():
-    """Create sample agents for testing."""
-    agents = []
-    for i in range(3):
-        agent = Mock(spec=Agent)
-        agent.agent_name = f"Agent{i+1}"
-        agent.agent_description = f"Test agent {i+1}"
-        agent.run = Mock(return_value=f"Response from Agent{i+1}")
-        agents.append(agent)
-    return agents
-
-
-@pytest.fixture
-def sample_board_members(sample_agents):
-    """Create sample board members for testing."""
-    roles = [
-        BoardMemberRole.CHAIRMAN,
-        BoardMemberRole.VICE_CHAIRMAN,
-        BoardMemberRole.SECRETARY,
-    ]
-    board_members = []
-
-    for i, (agent, role) in enumerate(zip(sample_agents, roles)):
-        board_member = BoardMember(
-            agent=agent,
-            role=role,
-            voting_weight=1.0 + (i * 0.2),
-            expertise_areas=[f"expertise_{i+1}"],
-        )
-        board_members.append(board_member)
-
-    return board_members
-
-
-@pytest.fixture
-def basic_board_swarm(sample_agents):
-    """Create a basic Board of Directors swarm for testing."""
-    return BoardOfDirectorsSwarm(
-        name="TestBoard",
-        agents=sample_agents,
-        verbose=False,
-        max_loops=1,
-    )
-
-
-@pytest.fixture
-def configured_board_swarm(sample_agents, sample_board_members):
-    """Create a configured Board of Directors swarm for testing."""
-    return BoardOfDirectorsSwarm(
-        name="ConfiguredBoard",
-        description="A configured board for testing",
-        board_members=sample_board_members,
-        agents=sample_agents,
-        max_loops=2,
-        verbose=True,
-        decision_threshold=0.7,
-        enable_voting=True,
-        enable_consensus=True,
-        max_workers=4,
-    )
-
-
-# Unit tests for enums and data models
-class TestBoardMemberRole:
-    """Test BoardMemberRole enum."""
-
-    def test_enum_values(self):
-        """Test that all enum values are correctly defined."""
-        assert BoardMemberRole.CHAIRMAN == "chairman"
-        assert BoardMemberRole.VICE_CHAIRMAN == "vice_chairman"
-        assert BoardMemberRole.SECRETARY == "secretary"
-        assert BoardMemberRole.TREASURER == "treasurer"
-        assert BoardMemberRole.MEMBER == "member"
-        assert (
-            BoardMemberRole.EXECUTIVE_DIRECTOR == "executive_director"
-        )
-
-
-class TestBoardDecisionType:
-    """Test BoardDecisionType enum."""
-
-    def test_enum_values(self):
-        """Test that all enum values are correctly defined."""
-        assert BoardDecisionType.UNANIMOUS == "unanimous"
-        assert BoardDecisionType.MAJORITY == "majority"
-        assert BoardDecisionType.CONSENSUS == "consensus"
-        assert (
-            BoardDecisionType.CHAIRMAN_DECISION == "chairman_decision"
-        )
-
-
-class TestBoardMember:
-    """Test BoardMember dataclass."""
-
-    def test_board_member_creation(self, mock_agent):
-        """Test creating a board member."""
-        board_member = BoardMember(
-            agent=mock_agent,
-            role=BoardMemberRole.CHAIRMAN,
-            voting_weight=1.5,
-            expertise_areas=["leadership", "strategy"],
-        )
-
-        assert board_member.agent == mock_agent
-        assert board_member.role == BoardMemberRole.CHAIRMAN
-        assert board_member.voting_weight == 1.5
-        assert board_member.expertise_areas == [
-            "leadership",
-            "strategy",
-        ]
-
-    def test_board_member_defaults(self, mock_agent):
-        """Test board member with default values."""
-        board_member = BoardMember(
-            agent=mock_agent, role=BoardMemberRole.MEMBER
-        )
-
-        assert board_member.voting_weight == 1.0
-        assert board_member.expertise_areas == []
-
-    def test_board_member_post_init(self, mock_agent):
-        """Test board member post-init with None expertise areas."""
-        board_member = BoardMember(
-            agent=mock_agent,
-            role=BoardMemberRole.MEMBER,
-            expertise_areas=None,
-        )
-
-        assert board_member.expertise_areas == []
-
-
-class TestBoardOrder:
-    """Test BoardOrder model."""
-
-    def test_board_order_creation(self):
-        """Test creating a board order."""
-        order = BoardOrder(
-            agent_name="TestAgent",
-            task="Test task",
-            priority=1,
-            deadline="2024-01-01",
-            assigned_by="Chairman",
-        )
-
-        assert order.agent_name == "TestAgent"
-        assert order.task == "Test task"
-        assert order.priority == 1
-        assert order.deadline == "2024-01-01"
-        assert order.assigned_by == "Chairman"
-
-    def test_board_order_defaults(self):
-        """Test board order with default values."""
-        order = BoardOrder(agent_name="TestAgent", task="Test task")
-
-        assert order.priority == 3
-        assert order.deadline is None
-        assert order.assigned_by == "Board of Directors"
-
-    def test_board_order_validation(self):
-        """Test board order validation."""
-        # Test priority validation
-        with pytest.raises(ValueError):
-            BoardOrder(
-                agent_name="TestAgent",
-                task="Test task",
-                priority=0,  # Invalid priority
-            )
-
-        with pytest.raises(ValueError):
-            BoardOrder(
-                agent_name="TestAgent",
-                task="Test task",
-                priority=6,  # Invalid priority
-            )
-
-
-class TestBoardDecision:
-    """Test BoardDecision model."""
-
-    def test_board_decision_creation(self):
-        """Test creating a board decision."""
-        decision = BoardDecision(
-            decision_type=BoardDecisionType.MAJORITY,
-            decision="Approve the proposal",
-            votes_for=3,
-            votes_against=1,
-            abstentions=0,
-            reasoning="The proposal aligns with our strategic goals",
-        )
-
-        assert decision.decision_type == BoardDecisionType.MAJORITY
-        assert decision.decision == "Approve the proposal"
-        assert decision.votes_for == 3
-        assert decision.votes_against == 1
-        assert decision.abstentions == 0
-        assert (
-            decision.reasoning
-            == "The proposal aligns with our strategic goals"
-        )
-
-    def test_board_decision_defaults(self):
-        """Test board decision with default values."""
-        decision = BoardDecision(
-            decision_type=BoardDecisionType.CONSENSUS,
-            decision="Test decision",
-        )
-
-        assert decision.votes_for == 0
-        assert decision.votes_against == 0
-        assert decision.abstentions == 0
-        assert decision.reasoning == ""
-
-
-class TestBoardSpec:
-    """Test BoardSpec model."""
-
-    def test_board_spec_creation(self):
-        """Test creating a board spec."""
-        orders = [
-            BoardOrder(agent_name="Agent1", task="Task 1"),
-            BoardOrder(agent_name="Agent2", task="Task 2"),
-        ]
-        decisions = [
-            BoardDecision(
-                decision_type=BoardDecisionType.MAJORITY,
-                decision="Decision 1",
-            )
-        ]
-
-        spec = BoardSpec(
-            plan="Test plan",
-            orders=orders,
-            decisions=decisions,
-            meeting_summary="Test meeting summary",
-        )
-
-        assert spec.plan == "Test plan"
-        assert len(spec.orders) == 2
-        assert len(spec.decisions) == 1
-        assert spec.meeting_summary == "Test meeting summary"
-
-    def test_board_spec_defaults(self):
-        """Test board spec with default values."""
-        spec = BoardSpec(plan="Test plan", orders=[])
-
-        assert spec.decisions == []
-        assert spec.meeting_summary == ""
-
-
-# Unit tests for BoardOfDirectorsSwarm
-class TestBoardOfDirectorsSwarmInitialization:
-    """Test BoardOfDirectorsSwarm initialization."""
-
-    def test_basic_initialization(self, sample_agents):
-        """Test basic swarm initialization."""
-        swarm = BoardOfDirectorsSwarm(
-            name="TestSwarm", agents=sample_agents
-        )
-
-        assert swarm.name == "TestSwarm"
-        assert len(swarm.agents) == 3
-        assert swarm.max_loops == 1
-        assert swarm.verbose is False
-        assert swarm.decision_threshold == 0.6
-
-    def test_configured_initialization(
-        self, sample_agents, sample_board_members
-    ):
-        """Test configured swarm initialization."""
-        swarm = BoardOfDirectorsSwarm(
-            name="ConfiguredSwarm",
-            description="Test description",
-            board_members=sample_board_members,
-            agents=sample_agents,
-            max_loops=3,
-            verbose=True,
-            decision_threshold=0.8,
-            enable_voting=False,
-            enable_consensus=False,
-            max_workers=8,
-        )
-
-        assert swarm.name == "ConfiguredSwarm"
-        assert swarm.description == "Test description"
-        assert len(swarm.board_members) == 3
-        assert len(swarm.agents) == 3
-        assert swarm.max_loops == 3
-        assert swarm.verbose is True
-        assert swarm.decision_threshold == 0.8
-        assert swarm.enable_voting is False
-        assert swarm.enable_consensus is False
-        assert swarm.max_workers == 8
-
-    def test_default_board_setup(self, sample_agents):
-        """Test default board setup when no board members provided."""
-        swarm = BoardOfDirectorsSwarm(agents=sample_agents)
-
-        assert len(swarm.board_members) == 3
-        assert swarm.board_members[0].role == BoardMemberRole.CHAIRMAN
-        assert (
-            swarm.board_members[1].role
-            == BoardMemberRole.VICE_CHAIRMAN
-        )
-        assert (
-            swarm.board_members[2].role == BoardMemberRole.SECRETARY
-        )
-
-    def test_initialization_without_agents(self):
-        """Test initialization without agents should raise error."""
-        with pytest.raises(
-            ValueError, match="No agents found in the swarm"
-        ):
-            BoardOfDirectorsSwarm(agents=[])
-
-    def test_initialization_with_invalid_max_loops(
-        self, sample_agents
-    ):
-        """Test initialization with invalid max_loops."""
-        with pytest.raises(
-            ValueError, match="Max loops must be greater than 0"
-        ):
-            BoardOfDirectorsSwarm(agents=sample_agents, max_loops=0)
-
-    def test_initialization_with_invalid_decision_threshold(
-        self, sample_agents
-    ):
-        """Test initialization with invalid decision threshold."""
-        with pytest.raises(
-            ValueError,
-            match="Decision threshold must be between 0.0 and 1.0",
-        ):
-            BoardOfDirectorsSwarm(
-                agents=sample_agents, decision_threshold=1.5
-            )
-
-
-class TestBoardOfDirectorsSwarmMethods:
-    """Test BoardOfDirectorsSwarm methods."""
-
-    def test_setup_default_board(self, sample_agents):
-        """Test default board setup."""
-        swarm = BoardOfDirectorsSwarm(agents=sample_agents)
-
-        assert len(swarm.board_members) == 3
-        assert all(
-            hasattr(member.agent, "agent_name")
-            for member in swarm.board_members
-        )
-        assert all(
-            hasattr(member.agent, "run")
-            for member in swarm.board_members
-        )
-
-    def test_get_chairman_prompt(self, sample_agents):
-        """Test chairman prompt generation."""
-        swarm = BoardOfDirectorsSwarm(agents=sample_agents)
-        prompt = swarm._get_chairman_prompt()
-
-        assert "Chairman" in prompt
-        assert "board meetings" in prompt
-        assert "consensus" in prompt
-
-    def test_get_vice_chairman_prompt(self, sample_agents):
-        """Test vice chairman prompt generation."""
-        swarm = BoardOfDirectorsSwarm(agents=sample_agents)
-        prompt = swarm._get_vice_chairman_prompt()
-
-        assert "Vice Chairman" in prompt
-        assert "supporting" in prompt
-        assert "operational" in prompt
-
-    def test_get_secretary_prompt(self, sample_agents):
-        """Test secretary prompt generation."""
-        swarm = BoardOfDirectorsSwarm(agents=sample_agents)
-        prompt = swarm._get_secretary_prompt()
-
-        assert "Secretary" in prompt
-        assert "documenting" in prompt
-        assert "records" in prompt
-
-    def test_format_board_members_info(self, configured_board_swarm):
-        """Test board members info formatting."""
-        info = configured_board_swarm._format_board_members_info()
-
-        assert "Chairman" in info
-        assert "Vice-Chairman" in info
-        assert "Secretary" in info
-        assert "expertise" in info
-
-    def test_add_board_member(
-        self, basic_board_swarm, mock_board_member
-    ):
-        """Test adding a board member."""
-        initial_count = len(basic_board_swarm.board_members)
-        basic_board_swarm.add_board_member(mock_board_member)
-
-        assert (
-            len(basic_board_swarm.board_members) == initial_count + 1
-        )
-        assert mock_board_member in basic_board_swarm.board_members
-
-    def test_remove_board_member(self, configured_board_swarm):
-        """Test removing a board member."""
-        member_to_remove = configured_board_swarm.board_members[0]
-        member_name = member_to_remove.agent.agent_name
-
-        initial_count = len(configured_board_swarm.board_members)
-        configured_board_swarm.remove_board_member(member_name)
-
-        assert (
-            len(configured_board_swarm.board_members)
-            == initial_count - 1
-        )
-        assert (
-            member_to_remove
-            not in configured_board_swarm.board_members
-        )
-
-    def test_get_board_member(self, configured_board_swarm):
-        """Test getting a board member by name."""
-        member = configured_board_swarm.board_members[0]
-        member_name = member.agent.agent_name
-
-        found_member = configured_board_swarm.get_board_member(
-            member_name
-        )
-        assert found_member == member
-
-        # Test with non-existent member
-        not_found = configured_board_swarm.get_board_member(
-            "NonExistent"
-        )
-        assert not_found is None
-
-    def test_get_board_summary(self, configured_board_swarm):
-        """Test getting board summary."""
-        summary = configured_board_swarm.get_board_summary()
-
-        assert "board_name" in summary
-        assert "total_members" in summary
-        assert "total_agents" in summary
-        assert "max_loops" in summary
-        assert "decision_threshold" in summary
-        assert "members" in summary
-
-        assert summary["board_name"] == "ConfiguredBoard"
-        assert summary["total_members"] == 3
-        assert summary["total_agents"] == 3
-
-
-class TestBoardMeetingOperations:
-    """Test board meeting operations."""
-
-    def test_create_board_meeting_prompt(
-        self, configured_board_swarm
-    ):
-        """Test board meeting prompt creation."""
-        task = "Test task for board meeting"
-        prompt = configured_board_swarm._create_board_meeting_prompt(
-            task
-        )
-
-        assert task in prompt
-        assert "BOARD OF DIRECTORS MEETING" in prompt
-        assert "INSTRUCTIONS" in prompt
-        assert "plan" in prompt
-        assert "orders" in prompt
-
-    def test_conduct_board_discussion(self, configured_board_swarm):
-        """Test board discussion conduction."""
-        prompt = "Test board meeting prompt"
-
-        with patch.object(
-            configured_board_swarm.board_members[0].agent, "run"
-        ) as mock_run:
-            mock_run.return_value = "Board discussion result"
-            result = configured_board_swarm._conduct_board_discussion(
-                prompt
-            )
-
-            assert result == "Board discussion result"
-            mock_run.assert_called_once_with(task=prompt, img=None)
-
-    def test_conduct_board_discussion_no_chairman(
-        self, sample_agents
-    ):
-        """Test board discussion when no chairman is found."""
-        swarm = BoardOfDirectorsSwarm(agents=sample_agents)
-        # Remove all board members
-        swarm.board_members = []
-
-        with pytest.raises(
-            ValueError, match="No chairman found in board members"
-        ):
-            swarm._conduct_board_discussion("Test prompt")
-
-    def test_parse_board_decisions_valid_json(
-        self, configured_board_swarm
-    ):
-        """Test parsing valid JSON board decisions."""
-        valid_json = """
-        {
-            "plan": "Test plan",
-            "orders": [
-                {
-                    "agent_name": "Agent1",
-                    "task": "Task 1",
-                    "priority": 1,
-                    "assigned_by": "Chairman"
-                }
-            ],
-            "decisions": [
-                {
-                    "decision_type": "majority",
-                    "decision": "Test decision",
-                    "votes_for": 2,
-                    "votes_against": 1,
-                    "abstentions": 0,
-                    "reasoning": "Test reasoning"
-                }
-            ],
-            "meeting_summary": "Test summary"
-        }
-        """
-
-        result = configured_board_swarm._parse_board_decisions(
-            valid_json
-        )
-
-        assert isinstance(result, BoardSpec)
-        assert result.plan == "Test plan"
-        assert len(result.orders) == 1
-        assert len(result.decisions) == 1
-        assert result.meeting_summary == "Test summary"
-
-    def test_parse_board_decisions_invalid_json(
-        self, configured_board_swarm
-    ):
-        """Test parsing invalid JSON board decisions."""
-        invalid_json = "Invalid JSON content"
-
-        result = configured_board_swarm._parse_board_decisions(
-            invalid_json
-        )
-
-        assert isinstance(result, BoardSpec)
-        assert result.plan == invalid_json
-        assert len(result.orders) == 0
-        assert len(result.decisions) == 0
-        assert (
-            result.meeting_summary
-            == "Parsing failed, using raw output"
-        )
-
-    def test_run_board_meeting(self, configured_board_swarm):
-        """Test running a complete board meeting."""
-        task = "Test board meeting task"
-
-        with patch.object(
-            configured_board_swarm, "_conduct_board_discussion"
-        ) as mock_discuss:
-            with patch.object(
-                configured_board_swarm, "_parse_board_decisions"
-            ) as mock_parse:
-                mock_discuss.return_value = "Board discussion"
-                mock_parse.return_value = BoardSpec(
-                    plan="Test plan",
-                    orders=[],
-                    decisions=[],
-                    meeting_summary="Test summary",
-                )
-
-                result = configured_board_swarm.run_board_meeting(
-                    task
-                )
-
-                assert isinstance(result, BoardSpec)
-                mock_discuss.assert_called_once()
-                mock_parse.assert_called_once_with("Board discussion")
-
-
-class TestTaskExecution:
-    """Test task execution methods."""
-
-    def test_call_single_agent(self, configured_board_swarm):
-        """Test calling a single agent."""
-        agent_name = "Agent1"
-        task = "Test task"
-
-        with patch.object(
-            configured_board_swarm.agents[0], "run"
-        ) as mock_run:
-            mock_run.return_value = "Agent response"
-            result = configured_board_swarm._call_single_agent(
-                agent_name, task
-            )
-
-            assert result == "Agent response"
-            mock_run.assert_called_once()
-
-    def test_call_single_agent_not_found(
-        self, configured_board_swarm
-    ):
-        """Test calling a non-existent agent."""
-        with pytest.raises(
-            ValueError, match="Agent 'NonExistent' not found"
-        ):
-            configured_board_swarm._call_single_agent(
-                "NonExistent", "Test task"
-            )
-
-    def test_execute_single_order(self, configured_board_swarm):
-        """Test executing a single order."""
-        order = BoardOrder(
-            agent_name="Agent1",
-            task="Test order task",
-            priority=1,
-            assigned_by="Chairman",
-        )
-
-        with patch.object(
-            configured_board_swarm, "_call_single_agent"
-        ) as mock_call:
-            mock_call.return_value = "Order execution result"
-            result = configured_board_swarm._execute_single_order(
-                order
-            )
-
-            assert result == "Order execution result"
-            mock_call.assert_called_once_with(
-                agent_name="Agent1", task="Test order task"
-            )
-
-    def test_execute_orders(self, configured_board_swarm):
-        """Test executing multiple orders."""
-        orders = [
-            BoardOrder(
-                agent_name="Agent1", task="Task 1", priority=1
-            ),
-            BoardOrder(
-                agent_name="Agent2", task="Task 2", priority=2
-            ),
-        ]
-
-        with patch.object(
-            configured_board_swarm, "_execute_single_order"
-        ) as mock_execute:
-            mock_execute.side_effect = ["Result 1", "Result 2"]
-            results = configured_board_swarm._execute_orders(orders)
-
-            assert len(results) == 2
-            assert results[0]["agent_name"] == "Agent1"
-            assert results[0]["output"] == "Result 1"
-            assert results[1]["agent_name"] == "Agent2"
-            assert results[1]["output"] == "Result 2"
-
-    def test_generate_board_feedback(self, configured_board_swarm):
-        """Test generating board feedback."""
-        outputs = [
-            {"agent_name": "Agent1", "output": "Output 1"},
-            {"agent_name": "Agent2", "output": "Output 2"},
-        ]
-
-        with patch.object(
-            configured_board_swarm.board_members[0].agent, "run"
-        ) as mock_run:
-            mock_run.return_value = "Board feedback"
-            result = configured_board_swarm._generate_board_feedback(
-                outputs
-            )
-
-            assert result == "Board feedback"
-            mock_run.assert_called_once()
-
-    def test_generate_board_feedback_no_chairman(self, sample_agents):
-        """Test generating feedback when no chairman is found."""
-        swarm = BoardOfDirectorsSwarm(agents=sample_agents)
-        swarm.board_members = []  # Remove all board members
-
-        with pytest.raises(
-            ValueError, match="No chairman found for feedback"
-        ):
-            swarm._generate_board_feedback([])
-
-
-class TestStepAndRunMethods:
-    """Test step and run methods."""
-
-    def test_step_method(self, configured_board_swarm):
-        """Test the step method."""
-        task = "Test step task"
-
-        with patch.object(
-            configured_board_swarm, "run_board_meeting"
-        ) as mock_meeting:
-            with patch.object(
-                configured_board_swarm, "_execute_orders"
-            ) as mock_execute:
-                with patch.object(
-                    configured_board_swarm, "_generate_board_feedback"
-                ) as mock_feedback:
-                    mock_meeting.return_value = BoardSpec(
-                        plan="Test plan",
-                        orders=[
-                            BoardOrder(
-                                agent_name="Agent1", task="Task 1"
-                            )
-                        ],
-                        decisions=[],
-                        meeting_summary="Test summary",
-                    )
-                    mock_execute.return_value = [
-                        {"agent_name": "Agent1", "output": "Result"}
-                    ]
-                    mock_feedback.return_value = "Board feedback"
-
-                    result = configured_board_swarm.step(task)
-
-                    assert result == "Board feedback"
-                    mock_meeting.assert_called_once_with(
-                        task=task, img=None
-                    )
-                    mock_execute.assert_called_once()
-                    mock_feedback.assert_called_once()
-
-    def test_step_method_no_feedback(self, configured_board_swarm):
-        """Test the step method with feedback disabled."""
-        configured_board_swarm.board_feedback_on = False
-        task = "Test step task"
-
-        with patch.object(
-            configured_board_swarm, "run_board_meeting"
-        ) as mock_meeting:
-            with patch.object(
-                configured_board_swarm, "_execute_orders"
-            ) as mock_execute:
-                mock_meeting.return_value = BoardSpec(
-                    plan="Test plan",
-                    orders=[
-                        BoardOrder(agent_name="Agent1", task="Task 1")
-                    ],
-                    decisions=[],
-                    meeting_summary="Test summary",
-                )
-                mock_execute.return_value = [
-                    {"agent_name": "Agent1", "output": "Result"}
-                ]
-
-                result = configured_board_swarm.step(task)
-
-                assert result == [
-                    {"agent_name": "Agent1", "output": "Result"}
-                ]
-
-    def test_run_method(self, configured_board_swarm):
-        """Test the run method."""
-        task = "Test run task"
-
-        with patch.object(
-            configured_board_swarm, "step"
-        ) as mock_step:
-            with patch.object(
-                configured_board_swarm, "conversation"
-            ) as mock_conversation:
-                mock_step.return_value = "Step result"
-                mock_conversation.add = Mock()
-
-                configured_board_swarm.run(task)
-
-                assert mock_step.call_count == 2  # max_loops = 2
-                assert mock_conversation.add.call_count == 2
-
-    def test_arun_method(self, configured_board_swarm):
-        """Test the async run method."""
-        task = "Test async run task"
-
-        with patch.object(configured_board_swarm, "run") as mock_run:
-            mock_run.return_value = "Async result"
-
-            async def test_async():
-                result = await configured_board_swarm.arun(task)
-                return result
-
-            result = asyncio.run(test_async())
-            assert result == "Async result"
-            mock_run.assert_called_once_with(task=task, img=None)
-
-
-# Integration tests
-class TestBoardOfDirectorsSwarmIntegration:
-    """Integration tests for BoardOfDirectorsSwarm."""
-
-    def test_full_workflow_integration(self, sample_agents):
-        """Test full workflow integration."""
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, verbose=False, max_loops=1
-        )
-
-        task = "Create a simple report"
-
-        # Mock the board discussion to return structured output
-        mock_board_output = """
-        {
-            "plan": "Create a comprehensive report",
-            "orders": [
-                {
-                    "agent_name": "Agent1",
-                    "task": "Research the topic",
-                    "priority": 1,
-                    "assigned_by": "Chairman"
-                },
-                {
-                    "agent_name": "Agent2",
-                    "task": "Write the report",
-                    "priority": 2,
-                    "assigned_by": "Chairman"
-                }
-            ],
-            "decisions": [
-                {
-                    "decision_type": "consensus",
-                    "decision": "Proceed with report creation",
-                    "votes_for": 3,
-                    "votes_against": 0,
-                    "abstentions": 0,
-                    "reasoning": "Report is needed for decision making"
-                }
-            ],
-            "meeting_summary": "Board agreed to create a comprehensive report"
-        }
-        """
-
-        with patch.object(
-            swarm.board_members[0].agent, "run"
-        ) as mock_run:
-            mock_run.return_value = mock_board_output
-            result = swarm.run(task)
-
-            assert result is not None
-            assert isinstance(result, dict)
-
-    def test_board_member_management_integration(self, sample_agents):
-        """Test board member management integration."""
-        swarm = BoardOfDirectorsSwarm(agents=sample_agents)
-
-        # Test adding a new board member
-        new_member = BoardMember(
-            agent=sample_agents[0],
-            role=BoardMemberRole.MEMBER,
-            voting_weight=1.0,
-            expertise_areas=["testing"],
-        )
-
-        initial_count = len(swarm.board_members)
-        swarm.add_board_member(new_member)
-        assert len(swarm.board_members) == initial_count + 1
-
-        # Test removing a board member
-        member_name = swarm.board_members[0].agent.agent_name
-        swarm.remove_board_member(member_name)
-        assert len(swarm.board_members) == initial_count
-
-        # Test getting board member
-        member = swarm.get_board_member(
-            swarm.board_members[0].agent.agent_name
-        )
-        assert member is not None
-
-
-# Parameterized tests
-@pytest.mark.parametrize("max_loops", [1, 2, 3])
-def test_max_loops_parameterization(sample_agents, max_loops):
-    """Test swarm with different max_loops values."""
-    swarm = BoardOfDirectorsSwarm(
-        agents=sample_agents, max_loops=max_loops
-    )
-    assert swarm.max_loops == max_loops
-
-
-@pytest.mark.parametrize(
-    "decision_threshold", [0.5, 0.6, 0.7, 0.8, 0.9]
-)
-def test_decision_threshold_parameterization(
-    sample_agents, decision_threshold
-):
-    """Test swarm with different decision threshold values."""
-    swarm = BoardOfDirectorsSwarm(
-        agents=sample_agents, decision_threshold=decision_threshold
-    )
-    assert swarm.decision_threshold == decision_threshold
-
-
-@pytest.mark.parametrize(
-    "board_model", ["gpt-4o-mini", "gpt-4", "claude-3-sonnet"]
-)
-def test_board_model_parameterization(sample_agents, board_model):
-    """Test swarm with different board models."""
-    swarm = BoardOfDirectorsSwarm(
-        agents=sample_agents, board_model_name=board_model
-    )
-    assert swarm.board_model_name == board_model
-
-
-# Error handling tests
-class TestBoardOfDirectorsSwarmErrorHandling:
-    """Test error handling in BoardOfDirectorsSwarm."""
-
-    def test_initialization_error_handling(self):
-        """Test error handling during initialization."""
-        with pytest.raises(ValueError):
-            BoardOfDirectorsSwarm(agents=[])
-
-    def test_board_meeting_error_handling(
-        self, configured_board_swarm
-    ):
-        """Test error handling during board meeting."""
-        with patch.object(
-            configured_board_swarm, "_conduct_board_discussion"
-        ) as mock_discuss:
-            mock_discuss.side_effect = Exception(
-                "Board meeting failed"
-            )
-
-            with pytest.raises(
-                Exception, match="Board meeting failed"
-            ):
-                configured_board_swarm.run_board_meeting("Test task")
-
-    def test_task_execution_error_handling(
-        self, configured_board_swarm
-    ):
-        """Test error handling during task execution."""
-        with patch.object(
-            configured_board_swarm, "_call_single_agent"
-        ) as mock_call:
-            mock_call.side_effect = Exception("Task execution failed")
-
-            with pytest.raises(
-                Exception, match="Task execution failed"
-            ):
-                configured_board_swarm._call_single_agent(
-                    "Agent1", "Test task"
-                )
-
-    def test_order_execution_error_handling(
-        self, configured_board_swarm
-    ):
-        """Test error handling during order execution."""
-        orders = [BoardOrder(agent_name="Agent1", task="Task 1")]
-
-        with patch.object(
-            configured_board_swarm, "_execute_single_order"
-        ) as mock_execute:
-            mock_execute.side_effect = Exception(
-                "Order execution failed"
-            )
-
-            # Should not raise exception, but log error
-            results = configured_board_swarm._execute_orders(orders)
-            assert len(results) == 1
-            assert "Error" in results[0]["output"]
-
-
-# Performance tests
-class TestBoardOfDirectorsSwarmPerformance:
-    """Test performance characteristics of BoardOfDirectorsSwarm."""
-
-    def test_parallel_execution_performance(self, sample_agents):
-        """Test parallel execution performance."""
-        import time
-
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, max_workers=3, verbose=False
-        )
-
-        # Create multiple orders
-        orders = [
-            BoardOrder(agent_name=f"Agent{i+1}", task=f"Task {i+1}")
-            for i in range(3)
-        ]
-
-        start_time = time.time()
-
-        with patch.object(
-            swarm, "_execute_single_order"
-        ) as mock_execute:
-            mock_execute.side_effect = (
-                lambda order: f"Result for {order.task}"
-            )
-            results = swarm._execute_orders(orders)
-
-        end_time = time.time()
-        execution_time = end_time - start_time
-
-        assert len(results) == 3
-        assert (
-            execution_time < 1.0
-        )  # Should complete quickly with parallel execution
-
-    def test_memory_usage(self, sample_agents):
-        """Test memory usage characteristics."""
-        import psutil
-        import os
-
-        process = psutil.Process(os.getpid())
-        initial_memory = process.memory_info().rss
-
-        # Create multiple swarms
-        swarms = []
-        for i in range(5):
-            swarm = BoardOfDirectorsSwarm(
-                agents=sample_agents, name=f"Swarm{i}", verbose=False
-            )
-            swarms.append(swarm)
-
-        final_memory = process.memory_info().rss
-        memory_increase = final_memory - initial_memory
-
-        # Memory increase should be reasonable (less than 100MB)
-        assert memory_increase < 100 * 1024 * 1024
-
-
-# Configuration tests
-class TestBoardOfDirectorsSwarmConfiguration:
-    """Test configuration options for BoardOfDirectorsSwarm."""
-
-    def test_verbose_configuration(self, sample_agents):
-        """Test verbose configuration."""
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, verbose=True
-        )
-        assert swarm.verbose is True
-
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, verbose=False
-        )
-        assert swarm.verbose is False
-
-    def test_collaboration_prompt_configuration(self, sample_agents):
-        """Test collaboration prompt configuration."""
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, add_collaboration_prompt=True
-        )
-        assert swarm.add_collaboration_prompt is True
-
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, add_collaboration_prompt=False
-        )
-        assert swarm.add_collaboration_prompt is False
-
-    def test_board_feedback_configuration(self, sample_agents):
-        """Test board feedback configuration."""
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, board_feedback_on=True
-        )
-        assert swarm.board_feedback_on is True
-
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, board_feedback_on=False
-        )
-        assert swarm.board_feedback_on is False
-
-    def test_voting_configuration(self, sample_agents):
-        """Test voting configuration."""
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, enable_voting=True
-        )
-        assert swarm.enable_voting is True
-
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, enable_voting=False
-        )
-        assert swarm.enable_voting is False
-
-    def test_consensus_configuration(self, sample_agents):
-        """Test consensus configuration."""
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, enable_consensus=True
-        )
-        assert swarm.enable_consensus is True
-
-        swarm = BoardOfDirectorsSwarm(
-            agents=sample_agents, enable_consensus=False
-        )
-        assert swarm.enable_consensus is False
-
-
-# Real integration tests (skipped if no API key)
-@pytest.mark.skipif(
-    not os.getenv("OPENAI_API_KEY"),
-    reason="OpenAI API key not available",
-)
-class TestBoardOfDirectorsSwarmRealIntegration:
-    """Real integration tests for BoardOfDirectorsSwarm."""
-
-    def test_real_board_meeting(self):
-        """Test real board meeting with actual API calls."""
-        # Create real agents
-        agents = [
-            Agent(
-                agent_name="Researcher",
-                agent_description="Research analyst",
-                model_name="gpt-4o-mini",
-                max_loops=1,
-            ),
-            Agent(
-                agent_name="Writer",
-                agent_description="Content writer",
-                model_name="gpt-4o-mini",
-                max_loops=1,
-            ),
-        ]
-
-        swarm = BoardOfDirectorsSwarm(
-            agents=agents, verbose=False, max_loops=1
-        )
-
-        task = "Create a brief market analysis report"
-
-        result = swarm.run(task)
-
-        assert result is not None
-        assert isinstance(result, dict)
-        assert "conversation_history" in result
-
-    def test_real_board_member_management(self):
-        """Test real board member management."""
-        agents = [
-            Agent(
-                agent_name="TestAgent",
-                agent_description="Test agent",
-                model_name="gpt-4o-mini",
-                max_loops=1,
-            )
-        ]
-
-        swarm = BoardOfDirectorsSwarm(agents=agents, verbose=False)
-
-        # Test board summary
-        summary = swarm.get_board_summary()
-        assert summary["total_members"] == 3  # Default board
-        assert summary["total_agents"] == 1
-
-
-# Test runner
-if __name__ == "__main__":
-    pytest.main([__file__, "-v", "--tb=short"])
+"""
+Comprehensive test suite for Board of Directors Swarm.
+
+This module contains extensive tests for the Board of Directors swarm implementation,
+covering all aspects including initialization, board operations, task execution,
+error handling, and performance characteristics.
+
+Tests follow the example.py pattern with real agents and multiple agent scenarios.
+"""
+
+import pytest
+from swarms.structs.board_of_directors_swarm import (
+    BoardOfDirectorsSwarm,
+    BoardMember,
+    BoardMemberRole,
+    BoardDecisionType,
+    BoardOrder,
+    BoardDecision,
+    BoardSpec,
+)
+from swarms.structs.agent import Agent
+
+
+@pytest.fixture
+def sample_agents():
+    """Create sample real agents for testing."""
+    agents = []
+    for i in range(5):
+        agent = Agent(
+            agent_name=f"Board-Member-{i+1}",
+            agent_description=f"Board member {i+1} with expertise in strategic decision making",
+            model_name="gpt-4o",
+            max_loops=1,
+        )
+        agents.append(agent)
+    return agents
+
+
+@pytest.fixture
+def basic_board_swarm(sample_agents):
+    """Create a basic Board of Directors swarm for testing."""
+    return BoardOfDirectorsSwarm(
+        name="Test-Board-Swarm",
+        description="Test board of directors swarm for comprehensive testing",
+        agents=sample_agents,
+        max_loops=1,
+        verbose=True,
+    )
+
+
+def test_board_of_directors_swarm_basic_initialization(basic_board_swarm):
+    """Test basic BoardOfDirectorsSwarm initialization with multiple agents"""
+    # Verify initialization
+    assert basic_board_swarm.name == "Test-Board-Swarm"
+    assert basic_board_swarm.description == "Test board of directors swarm for comprehensive testing"
+    assert len(basic_board_swarm.agents) == 5
+    assert basic_board_swarm.max_loops == 1
+    assert basic_board_swarm.verbose is True
+    assert basic_board_swarm.board_model_name == "gpt-4o-mini"
+    assert basic_board_swarm.decision_threshold == 0.6
+    assert basic_board_swarm.enable_voting is True
+    assert basic_board_swarm.enable_consensus is True
+
+
+def test_board_of_directors_swarm_execution(basic_board_swarm):
+    """Test BoardOfDirectorsSwarm execution with multiple board members"""
+    # Test execution
+    result = basic_board_swarm.run(
+        "Develop a strategic plan for entering the renewable energy market. "
+        "Consider market opportunities, competitive landscape, technical requirements, "
+        "and regulatory compliance."
+    )
+
+    assert result is not None
+
+
+def test_board_of_directors_swarm_with_custom_configuration():
+    """Test BoardOfDirectorsSwarm with custom configuration"""
+    # Create specialized agents for different board roles
+    ceo = Agent(
+        agent_name="CEO",
+        agent_description="Chief Executive Officer with overall strategic vision",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    cfo = Agent(
+        agent_name="CFO",
+        agent_description="Chief Financial Officer with financial expertise",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    cto = Agent(
+        agent_name="CTO",
+        agent_description="Chief Technology Officer with technical expertise",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    cmo = Agent(
+        agent_name="CMO",
+        agent_description="Chief Marketing Officer with market expertise",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    legal_counsel = Agent(
+        agent_name="Legal-Counsel",
+        agent_description="Chief Legal Officer with regulatory expertise",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create board swarm with custom configuration
+    board_swarm = BoardOfDirectorsSwarm(
+        name="Executive-Board-Swarm",
+        description="Executive board for strategic enterprise decisions",
+        agents=[ceo, cfo, cto, cmo, legal_counsel],
+        max_loops=2,
+        decision_threshold=0.7,
+        enable_voting=True,
+        enable_consensus=True,
+        verbose=True,
+    )
+
+    # Test execution with complex scenario
+    result = board_swarm.run(
+        "Evaluate the acquisition of a competitor in the AI space. "
+        "Consider financial implications, technical integration challenges, "
+        "market positioning, legal considerations, and overall strategic fit."
+    )
+
+    assert result is not None
+
+
+def test_board_of_directors_swarm_error_handling():
+    """Test BoardOfDirectorsSwarm error handling and validation"""
+    # Test with empty agents list
+    try:
+        board_swarm = BoardOfDirectorsSwarm(agents=[])
+        assert False, "Should have raised ValueError for empty agents list"
+    except ValueError as e:
+        assert "agents" in str(e).lower() or "empty" in str(e).lower()
+
+    # Test with invalid max_loops
+    analyst = Agent(
+        agent_name="Test-Analyst",
+        agent_description="Test analyst",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    try:
+        board_swarm = BoardOfDirectorsSwarm(agents=[analyst], max_loops=0)
+        assert False, "Should have raised ValueError for invalid max_loops"
+    except ValueError as e:
+        assert "max_loops" in str(e).lower() or "0" in str(e)
+
+
+def test_board_of_directors_swarm_real_world_scenario():
+    """Test BoardOfDirectorsSwarm in a realistic business scenario"""
+    # Create agents representing different C-suite executives
+    chief_strategy_officer = Agent(
+        agent_name="Chief-Strategy-Officer",
+        agent_description="Chief Strategy Officer with expertise in corporate strategy and market analysis",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    chief_technology_officer = Agent(
+        agent_name="Chief-Technology-Officer",
+        agent_description="Chief Technology Officer with deep technical expertise and innovation focus",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    chief_financial_officer = Agent(
+        agent_name="Chief-Financial-Officer",
+        agent_description="Chief Financial Officer with expertise in financial planning and risk management",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    chief_operating_officer = Agent(
+        agent_name="Chief-Operating-Officer",
+        agent_description="Chief Operating Officer with expertise in operations and implementation",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    chief_risk_officer = Agent(
+        agent_name="Chief-Risk-Officer",
+        agent_description="Chief Risk Officer with expertise in risk assessment and compliance",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create comprehensive executive board
+    executive_board = BoardOfDirectorsSwarm(
+        name="Executive-Board-of-Directors",
+        description="Executive board for high-level strategic decision making",
+        agents=[chief_strategy_officer, chief_technology_officer, chief_financial_officer,
+                chief_operating_officer, chief_risk_officer],
+        max_loops=3,
+        decision_threshold=0.8,  # Require strong consensus
+        enable_voting=True,
+        enable_consensus=True,
+        verbose=True,
+    )
+
+    # Test with complex enterprise scenario
+    result = executive_board.run(
+        "Develop a comprehensive 5-year strategic plan for transforming our company into a "
+        "leader in AI-powered enterprise solutions. Consider market opportunities, competitive "
+        "landscape, technological requirements, financial implications, operational capabilities, "
+        "and risk management strategies."
+    )
+
+    assert result is not None
diff --git a/tests/structs/test_concurrent_workflow.py b/tests/structs/test_concurrent_workflow.py
index 9cad973e..95b01726 100644
--- a/tests/structs/test_concurrent_workflow.py
+++ b/tests/structs/test_concurrent_workflow.py
@@ -2,129 +2,328 @@ from swarms import Agent
 from swarms.structs.concurrent_workflow import ConcurrentWorkflow
 
 
-def test_basic_workflow():
-    """Test basic workflow initialization and execution"""
-    # Create test agents
-    agent1 = Agent(
-        agent_name="Test-Agent-1",
-        system_prompt="You are a test agent 1",
-        model_name="claude-3-sonnet-20240229",
+def test_concurrent_workflow_basic_execution():
+    """Test basic ConcurrentWorkflow execution with multiple agents"""
+    # Create specialized agents for different perspectives
+    research_agent = Agent(
+        agent_name="Research-Analyst",
+        agent_description="Agent specializing in research and data collection",
+        model_name="gpt-4o",
         max_loops=1,
     )
 
-    agent2 = Agent(
-        agent_name="Test-Agent-2",
-        system_prompt="You are a test agent 2",
-        model_name="claude-3-sonnet-20240229",
+    strategy_agent = Agent(
+        agent_name="Strategy-Consultant",
+        agent_description="Agent specializing in strategic planning and analysis",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    risk_agent = Agent(
+        agent_name="Risk-Assessment-Specialist",
+        agent_description="Agent specializing in risk analysis and mitigation",
+        model_name="gpt-4o",
         max_loops=1,
     )
 
-    # Create workflow
+    # Create workflow with multiple agents
     workflow = ConcurrentWorkflow(
-        name="test-workflow", agents=[agent1, agent2], max_loops=1
+        name="Multi-Perspective-Analysis-Workflow",
+        description="Concurrent analysis from research, strategy, and risk perspectives",
+        agents=[research_agent, strategy_agent, risk_agent],
+        max_loops=1,
     )
 
     # Run workflow
-    result = workflow.run("Test task")
+    result = workflow.run("Analyze the potential impact of quantum computing on cybersecurity")
 
-    # Verify results
-    assert len(result) == 2
-    assert all(isinstance(r, dict) for r in result)
-    assert all("agent" in r and "output" in r for r in result)
+    # Verify results - ConcurrentWorkflow returns a list of dictionaries
+    assert result is not None
+    assert isinstance(result, list)
+    assert len(result) == 3
+    for r in result:
+        assert isinstance(r, dict)
+        assert "agent" in r
+        assert "output" in r
+        # Output might be None or empty string, just check it exists
+
+
+def test_concurrent_workflow_with_dashboard():
+    """Test ConcurrentWorkflow with dashboard visualization"""
+    # Create agents with different expertise
+    market_agent = Agent(
+        agent_name="Market-Analyst",
+        agent_description="Agent for market analysis and trends",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
+    financial_agent = Agent(
+        agent_name="Financial-Expert",
+        agent_description="Agent for financial analysis and forecasting",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-def test_dashboard_workflow():
-    """Test workflow with dashboard enabled"""
-    agent = Agent(
-        agent_name="Dashboard-Test-Agent",
-        system_prompt="You are a test agent",
-        model_name="claude-3-sonnet-20240229",
+    technology_agent = Agent(
+        agent_name="Technology-Specialist",
+        agent_description="Agent for technology assessment and innovation",
+        model_name="gpt-4o",
         max_loops=1,
     )
 
     workflow = ConcurrentWorkflow(
-        name="dashboard-test",
-        agents=[agent],
+        name="Dashboard-Analysis-Workflow",
+        description="Concurrent analysis with real-time dashboard monitoring",
+        agents=[market_agent, financial_agent, technology_agent],
         max_loops=1,
         show_dashboard=True,
     )
 
-    result = workflow.run("Test task")
+    result = workflow.run("Evaluate investment opportunities in renewable energy sector")
 
-    assert len(result) == 1
-    assert isinstance(result[0], dict)
-    assert "agent" in result[0]
-    assert "output" in result[0]
+    assert result is not None
+    assert isinstance(result, list)
+    assert len(result) == 3
+    for r in result:
+        assert isinstance(r, dict)
+        assert "agent" in r
+        assert "output" in r
+        # Output can be None or empty, just check structure
 
 
-def test_multiple_agents():
-    """Test workflow with multiple agents"""
+def test_concurrent_workflow_batched_execution():
+    """Test batched execution of multiple tasks"""
+    # Create agents for comprehensive analysis
     agents = [
         Agent(
-            agent_name=f"Agent-{i}",
-            system_prompt=f"You are test agent {i}",
-            model_name="claude-3-sonnet-20240229",
+            agent_name=f"Analysis-Agent-{i+1}",
+            agent_description=f"Agent {i+1} for comprehensive business analysis",
+            model_name="gpt-4o",
             max_loops=1,
         )
-        for i in range(3)
+        for i in range(4)
     ]
 
     workflow = ConcurrentWorkflow(
-        name="multi-agent-test", agents=agents, max_loops=1
+        name="Batched-Analysis-Workflow",
+        description="Workflow for processing multiple analysis tasks",
+        agents=agents,
+        max_loops=1,
     )
 
-    result = workflow.run("Multi-agent test task")
+    # Test batched execution
+    tasks = [
+        "Analyze market trends in AI adoption",
+        "Evaluate competitive landscape in cloud computing",
+        "Assess regulatory impacts on fintech",
+        "Review supply chain vulnerabilities in manufacturing"
+    ]
 
-    assert len(result) == 3
-    assert all(isinstance(r, dict) for r in result)
-    assert all("agent" in r and "output" in r for r in result)
+    results = workflow.batch_run(tasks)
 
+    assert results is not None
+    assert isinstance(results, list)
+    assert len(results) == 4
+    # Each result should be a list of agent outputs
+    for result in results:
+        assert result is not None
+        assert isinstance(result, list)
 
-def test_error_handling():
-    """Test workflow error handling"""
-    # Create an agent that will raise an exception
-    agent = Agent(
-        agent_name="Error-Agent",
-        system_prompt="You are a test agent that will raise an error",
-        model_name="invalid-model",  # This will cause an error
-        max_loops=1,
-    )
 
-    workflow = ConcurrentWorkflow(
-        name="error-test", agents=[agent], max_loops=1
-    )
+def test_concurrent_workflow_error_handling():
+    """Test ConcurrentWorkflow error handling and validation"""
+    # Test with empty agents list
+    try:
+        workflow = ConcurrentWorkflow(agents=[])
+        assert False, "Should have raised ValueError for empty agents list"
+    except ValueError as e:
+        assert "No agents provided" in str(e)
 
+    # Test with None agents
     try:
-        workflow.run("Test task")
-        assert False, "Expected an error but none was raised"
-    except Exception as e:
+        workflow = ConcurrentWorkflow(agents=None)
+        assert False, "Should have raised ValueError for None agents"
+    except ValueError as e:
+        assert "No agents provided" in str(e)
         assert str(e) != ""  # Verify we got an error message
 
 
-def test_max_loops():
-    """Test workflow respects max_loops setting"""
-    agent = Agent(
-        agent_name="Loop-Test-Agent",
-        system_prompt="You are a test agent",
-        model_name="claude-3-sonnet-20240229",
+def test_concurrent_workflow_max_loops_configuration():
+    """Test ConcurrentWorkflow max_loops configuration"""
+    agent1 = Agent(
+        agent_name="Loop-Test-Agent-1",
+        agent_description="First agent for loop testing",
+        model_name="gpt-4o",
         max_loops=2,
     )
 
+    agent2 = Agent(
+        agent_name="Loop-Test-Agent-2",
+        agent_description="Second agent for loop testing",
+        model_name="gpt-4o",
+        max_loops=3,
+    )
+
     workflow = ConcurrentWorkflow(
-        name="loop-test",
-        agents=[agent],
+        name="Loop-Configuration-Test",
+        description="Testing max_loops configuration",
+        agents=[agent1, agent2],
         max_loops=1,  # This should override agent's max_loops
     )
 
-    result = workflow.run("Test task")
+    result = workflow.run("Test workflow loop configuration")
+
+    assert result is not None
+    assert isinstance(result, list)
+    assert len(result) == 2
+    for r in result:
+        assert isinstance(r, dict)
+        assert "agent" in r
+        assert "output" in r
+
+
+def test_concurrent_workflow_different_output_types():
+    """Test ConcurrentWorkflow with different output types"""
+    # Create agents with diverse perspectives
+    technical_agent = Agent(
+        agent_name="Technical-Analyst",
+        agent_description="Agent for technical analysis",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    business_agent = Agent(
+        agent_name="Business-Strategist",
+        agent_description="Agent for business strategy",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    legal_agent = Agent(
+        agent_name="Legal-Expert",
+        agent_description="Agent for legal compliance analysis",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Test different output types
+    for output_type in ["dict", "dict-all-except-first"]:
+        workflow = ConcurrentWorkflow(
+            name=f"Output-Type-Test-{output_type}",
+            description=f"Testing output type: {output_type}",
+            agents=[technical_agent, business_agent, legal_agent],
+            max_loops=1,
+            output_type=output_type,
+        )
+
+        result = workflow.run("Evaluate AI implementation strategy")
+        assert result is not None
+        # The result structure depends on output_type, just ensure it's not None
+
+
+def test_concurrent_workflow_real_world_scenario():
+    """Test ConcurrentWorkflow in a realistic business scenario"""
+    # Create agents representing different departments
+    marketing_agent = Agent(
+        agent_name="Marketing-Director",
+        agent_description="Senior marketing director with 15 years experience",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    product_agent = Agent(
+        agent_name="Product-Manager",
+        agent_description="Product manager specializing in AI/ML products",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    engineering_agent = Agent(
+        agent_name="Lead-Engineer",
+        agent_description="Senior software engineer and technical architect",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    sales_agent = Agent(
+        agent_name="Sales-Executive",
+        agent_description="Enterprise sales executive with tech background",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    workflow = ConcurrentWorkflow(
+        name="Product-Launch-Review-Workflow",
+        description="Cross-functional team reviewing new AI product launch strategy",
+        agents=[marketing_agent, product_agent, engineering_agent, sales_agent],
+        max_loops=1,
+    )
 
-    assert len(result) == 1
-    assert isinstance(result[0], dict)
+    # Test with a realistic business scenario
+    result = workflow.run(
+        "Review and provide recommendations for our new AI-powered analytics platform launch. "
+        "Consider market positioning, technical feasibility, competitive landscape, and sales strategy."
+    )
+
+    assert result is not None
+    assert isinstance(result, list)
+    assert len(result) == 4
+    for r in result:
+        assert isinstance(r, dict)
+        assert "agent" in r
+        assert "output" in r
+        # Output content may vary, just check structure
+
+
+def test_concurrent_workflow_team_collaboration():
+    """Test ConcurrentWorkflow with team collaboration features"""
+    # Create agents that would naturally collaborate
+    data_scientist = Agent(
+        agent_name="Data-Scientist",
+        agent_description="ML engineer and data scientist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    ux_designer = Agent(
+        agent_name="UX-Designer",
+        agent_description="User experience designer and researcher",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    product_owner = Agent(
+        agent_name="Product-Owner",
+        agent_description="Product owner with business and technical background",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    qa_engineer = Agent(
+        agent_name="QA-Engineer",
+        agent_description="Quality assurance engineer and testing specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
+    workflow = ConcurrentWorkflow(
+        name="Cross-Functional-Development-Workflow",
+        description="Cross-functional team collaborating on feature development",
+        agents=[data_scientist, ux_designer, product_owner, qa_engineer],
+        max_loops=1,
+    )
+
+    result = workflow.run(
+        "Design and plan a new recommendation system for our e-commerce platform. "
+        "Each team member should provide their perspective on implementation, user experience, "
+        "business value, and quality assurance considerations."
+    )
 
-if __name__ == "__main__":
-    test_basic_workflow()
-    test_dashboard_workflow()
-    test_multiple_agents()
-    test_error_handling()
-    test_max_loops()
+    assert result is not None
+    assert isinstance(result, list)
+    assert len(result) == 4
+    for r in result:
+        assert isinstance(r, dict)
+        assert "agent" in r
+        assert "output" in r
diff --git a/tests/structs/test_graph_workflow_comprehensive.py b/tests/structs/test_graph_workflow_comprehensive.py
index 2a8fe248..871fd31e 100644
--- a/tests/structs/test_graph_workflow_comprehensive.py
+++ b/tests/structs/test_graph_workflow_comprehensive.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 """
 Comprehensive Testing Suite for GraphWorkflow
 
@@ -6,24 +5,13 @@ This module provides thorough testing of all GraphWorkflow functionality includi
 - Node and Edge creation and manipulation
 - Workflow construction and compilation
 - Execution with various parameters
-- Visualization and serialization
+- Multi-agent collaboration scenarios
 - Error handling and edge cases
-- Performance optimizations
 
-Usage:
-    python test_graph_workflow_comprehensive.py
+Tests follow the example.py pattern with real agents and multiple agent scenarios.
 """
 
-import json
-import time
-import tempfile
-import os
-import sys
-from unittest.mock import Mock
-
-# Add the swarms directory to the path
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), "swarms"))
-
+import pytest
 from swarms.structs.graph_workflow import (
     GraphWorkflow,
     Node,
@@ -31,1079 +19,203 @@ from swarms.structs.graph_workflow import (
     NodeType,
 )
 from swarms.structs.agent import Agent
-from swarms.prompts.multi_agent_collab_prompt import (
-    MULTI_AGENT_COLLAB_PROMPT_TWO,
-)
-
-
-class TestResults:
-    """Simple test results tracker"""
-
-    def __init__(self):
-        self.passed = 0
-        self.failed = 0
-        self.errors = []
 
-    def add_pass(self, test_name: str):
-        self.passed += 1
-        print(f"✅ PASS: {test_name}")
 
-    def add_fail(self, test_name: str, error: str):
-        self.failed += 1
-        self.errors.append(f"{test_name}: {error}")
-        print(f"❌ FAIL: {test_name} - {error}")
+def create_test_agent(name: str, description: str = None) -> Agent:
+    """Create a real agent for testing"""
+    if description is None:
+        description = f"Test agent for {name} operations"
 
-    def print_summary(self):
-        print("\n" + "=" * 60)
-        print("TEST SUMMARY")
-        print("=" * 60)
-        print(f"Passed: {self.passed}")
-        print(f"Failed: {self.failed}")
-        print(f"Total: {self.passed + self.failed}")
-
-        if self.errors:
-            print("\nErrors:")
-            for error in self.errors:
-                print(f"  - {error}")
-
-
-def create_mock_agent(name: str, model: str = "gpt-4") -> Agent:
-    """Create a mock agent for testing"""
-    agent = Agent(
+    return Agent(
         agent_name=name,
-        model_name=model,
+        agent_description=description,
+        model_name="gpt-4o",
         max_loops=1,
-        system_prompt=MULTI_AGENT_COLLAB_PROMPT_TWO,
     )
-    # Mock the run method to avoid actual API calls
-    agent.run = Mock(return_value=f"Mock output from {name}")
-    return agent
-
-
-def test_node_creation(results: TestResults):
-    """Test Node creation with various parameters"""
-    test_name = "Node Creation"
-
-    try:
-        # Test basic node creation
-        agent = create_mock_agent("TestAgent")
-        node = Node.from_agent(agent)
-        assert node.id == "TestAgent"
-        assert node.type == NodeType.AGENT
-        assert node.agent == agent
-        results.add_pass(f"{test_name} - Basic")
-
-        # Test node with custom id
-        node2 = Node(id="CustomID", type=NodeType.AGENT, agent=agent)
-        assert node2.id == "CustomID"
-        results.add_pass(f"{test_name} - Custom ID")
-
-        # Test node with metadata
-        metadata = {"priority": "high", "timeout": 30}
-        node3 = Node.from_agent(agent, metadata=metadata)
-        assert node3.metadata == metadata
-        results.add_pass(f"{test_name} - Metadata")
-
-        # Test error case - no id and no agent
-        try:
-            Node()
-            results.add_fail(
-                f"{test_name} - No ID validation",
-                "Should raise ValueError",
-            )
-        except ValueError:
-            results.add_pass(f"{test_name} - No ID validation")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_edge_creation(results: TestResults):
-    """Test Edge creation with various parameters"""
-    test_name = "Edge Creation"
-
-    try:
-        # Test basic edge creation
-        edge = Edge(source="A", target="B")
-        assert edge.source == "A"
-        assert edge.target == "B"
-        results.add_pass(f"{test_name} - Basic")
-
-        # Test edge with metadata
-        metadata = {"weight": 1.5, "type": "data"}
-        edge2 = Edge(source="A", target="B", metadata=metadata)
-        assert edge2.metadata == metadata
-        results.add_pass(f"{test_name} - Metadata")
-
-        # Test edge from nodes
-        node1 = Node(id="Node1", agent=create_mock_agent("Agent1"))
-        node2 = Node(id="Node2", agent=create_mock_agent("Agent2"))
-        edge3 = Edge.from_nodes(node1, node2)
-        assert edge3.source == "Node1"
-        assert edge3.target == "Node2"
-        results.add_pass(f"{test_name} - From Nodes")
-
-        # Test edge from node ids
-        edge4 = Edge.from_nodes("Node1", "Node2")
-        assert edge4.source == "Node1"
-        assert edge4.target == "Node2"
-        results.add_pass(f"{test_name} - From IDs")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_graph_workflow_initialization(results: TestResults):
-    """Test GraphWorkflow initialization with various parameters"""
-    test_name = "GraphWorkflow Initialization"
-
-    try:
-        # Test basic initialization
-        workflow = GraphWorkflow()
-        assert workflow.nodes == {}
-        assert workflow.edges == []
-        assert workflow.entry_points == []
-        assert workflow.end_points == []
-        assert workflow.max_loops == 1
-        assert workflow.auto_compile is True
-        results.add_pass(f"{test_name} - Basic")
-
-        # Test initialization with custom parameters
-        workflow2 = GraphWorkflow(
-            id="test-id",
-            name="Test Workflow",
-            description="Test description",
-            max_loops=5,
-            auto_compile=False,
-            verbose=True,
-        )
-        assert workflow2.id == "test-id"
-        assert workflow2.name == "Test Workflow"
-        assert workflow2.description == "Test description"
-        assert workflow2.max_loops == 5
-        assert workflow2.auto_compile is False
-        assert workflow2.verbose is True
-        results.add_pass(f"{test_name} - Custom Parameters")
-
-        # Test initialization with nodes and edges
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        node1 = Node.from_agent(agent1)
-        node2 = Node.from_agent(agent2)
-        edge = Edge(source="Agent1", target="Agent2")
-
-        workflow3 = GraphWorkflow(
-            nodes={"Agent1": node1, "Agent2": node2},
-            edges=[edge],
-            entry_points=["Agent1"],
-            end_points=["Agent2"],
-        )
-        assert len(workflow3.nodes) == 2
-        assert len(workflow3.edges) == 1
-        assert workflow3.entry_points == ["Agent1"]
-        assert workflow3.end_points == ["Agent2"]
-        results.add_pass(f"{test_name} - With Nodes and Edges")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_add_node(results: TestResults):
-    """Test adding nodes to the workflow"""
-    test_name = "Add Node"
-
-    try:
-        workflow = GraphWorkflow()
-
-        # Test adding a single node
-        agent = create_mock_agent("TestAgent")
-        workflow.add_node(agent)
-        assert "TestAgent" in workflow.nodes
-        assert workflow.nodes["TestAgent"].agent == agent
-        results.add_pass(f"{test_name} - Single Node")
-
-        # Test adding node with metadata - FIXED: pass metadata correctly
-        agent2 = create_mock_agent("TestAgent2")
-        workflow.add_node(
-            agent2, metadata={"priority": "high", "timeout": 30}
-        )
-        assert (
-            workflow.nodes["TestAgent2"].metadata["priority"]
-            == "high"
-        )
-        assert workflow.nodes["TestAgent2"].metadata["timeout"] == 30
-        results.add_pass(f"{test_name} - Node with Metadata")
-
-        # Test error case - duplicate node
-        try:
-            workflow.add_node(agent)
-            results.add_fail(
-                f"{test_name} - Duplicate validation",
-                "Should raise ValueError",
-            )
-        except ValueError:
-            results.add_pass(f"{test_name} - Duplicate validation")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_add_edge(results: TestResults):
-    """Test adding edges to the workflow"""
-    test_name = "Add Edge"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-
-        # Test adding edge by source and target
-        workflow.add_edge("Agent1", "Agent2")
-        assert len(workflow.edges) == 1
-        assert workflow.edges[0].source == "Agent1"
-        assert workflow.edges[0].target == "Agent2"
-        results.add_pass(f"{test_name} - Source Target")
-
-        # Test adding edge object
-        edge = Edge(
-            source="Agent2", target="Agent1", metadata={"weight": 2}
-        )
-        workflow.add_edge(edge)
-        assert len(workflow.edges) == 2
-        assert workflow.edges[1].metadata["weight"] == 2
-        results.add_pass(f"{test_name} - Edge Object")
-
-        # Test error case - invalid source
-        try:
-            workflow.add_edge("InvalidAgent", "Agent1")
-            results.add_fail(
-                f"{test_name} - Invalid source validation",
-                "Should raise ValueError",
-            )
-        except ValueError:
-            results.add_pass(
-                f"{test_name} - Invalid source validation"
-            )
-
-        # Test error case - invalid target
-        try:
-            workflow.add_edge("Agent1", "InvalidAgent")
-            results.add_fail(
-                f"{test_name} - Invalid target validation",
-                "Should raise ValueError",
-            )
-        except ValueError:
-            results.add_pass(
-                f"{test_name} - Invalid target validation"
-            )
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_add_edges_from_source(results: TestResults):
-    """Test adding multiple edges from a single source"""
-    test_name = "Add Edges From Source"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        agent3 = create_mock_agent("Agent3")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_node(agent3)
-
-        # Test fan-out pattern
-        edges = workflow.add_edges_from_source(
-            "Agent1", ["Agent2", "Agent3"]
-        )
-        assert len(edges) == 2
-        assert len(workflow.edges) == 2
-        assert all(edge.source == "Agent1" for edge in edges)
-        assert {edge.target for edge in edges} == {"Agent2", "Agent3"}
-        results.add_pass(f"{test_name} - Fan-out")
-
-        # Test with metadata - FIXED: pass metadata correctly
-        edges2 = workflow.add_edges_from_source(
-            "Agent2", ["Agent3"], metadata={"weight": 1.5}
-        )
-        assert edges2[0].metadata["weight"] == 1.5
-        results.add_pass(f"{test_name} - With Metadata")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_add_edges_to_target(results: TestResults):
-    """Test adding multiple edges to a single target"""
-    test_name = "Add Edges To Target"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        agent3 = create_mock_agent("Agent3")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_node(agent3)
-
-        # Test fan-in pattern
-        edges = workflow.add_edges_to_target(
-            ["Agent1", "Agent2"], "Agent3"
-        )
-        assert len(edges) == 2
-        assert len(workflow.edges) == 2
-        assert all(edge.target == "Agent3" for edge in edges)
-        assert {edge.source for edge in edges} == {"Agent1", "Agent2"}
-        results.add_pass(f"{test_name} - Fan-in")
-
-        # Test with metadata - FIXED: pass metadata correctly
-        edges2 = workflow.add_edges_to_target(
-            ["Agent1"], "Agent2", metadata={"priority": "high"}
-        )
-        assert edges2[0].metadata["priority"] == "high"
-        results.add_pass(f"{test_name} - With Metadata")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_add_parallel_chain(results: TestResults):
-    """Test adding parallel chain connections"""
-    test_name = "Add Parallel Chain"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        agent3 = create_mock_agent("Agent3")
-        agent4 = create_mock_agent("Agent4")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_node(agent3)
-        workflow.add_node(agent4)
-
-        # Test parallel chain
-        edges = workflow.add_parallel_chain(
-            ["Agent1", "Agent2"], ["Agent3", "Agent4"]
-        )
-        assert len(edges) == 4  # 2 sources * 2 targets
-        assert len(workflow.edges) == 4
-        results.add_pass(f"{test_name} - Parallel Chain")
-
-        # Test with metadata - FIXED: pass metadata correctly
-        edges2 = workflow.add_parallel_chain(
-            ["Agent1"], ["Agent2"], metadata={"batch_size": 10}
-        )
-        assert edges2[0].metadata["batch_size"] == 10
-        results.add_pass(f"{test_name} - With Metadata")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_set_entry_end_points(results: TestResults):
-    """Test setting entry and end points"""
-    test_name = "Set Entry/End Points"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-
-        # Test setting entry points
-        workflow.set_entry_points(["Agent1"])
-        assert workflow.entry_points == ["Agent1"]
-        results.add_pass(f"{test_name} - Entry Points")
-
-        # Test setting end points
-        workflow.set_end_points(["Agent2"])
-        assert workflow.end_points == ["Agent2"]
-        results.add_pass(f"{test_name} - End Points")
-
-        # Test error case - invalid entry point
-        try:
-            workflow.set_entry_points(["InvalidAgent"])
-            results.add_fail(
-                f"{test_name} - Invalid entry validation",
-                "Should raise ValueError",
-            )
-        except ValueError:
-            results.add_pass(
-                f"{test_name} - Invalid entry validation"
-            )
-
-        # Test error case - invalid end point
-        try:
-            workflow.set_end_points(["InvalidAgent"])
-            results.add_fail(
-                f"{test_name} - Invalid end validation",
-                "Should raise ValueError",
-            )
-        except ValueError:
-            results.add_pass(f"{test_name} - Invalid end validation")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_auto_set_entry_end_points(results: TestResults):
-    """Test automatic setting of entry and end points"""
-    test_name = "Auto Set Entry/End Points"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        agent3 = create_mock_agent("Agent3")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_node(agent3)
-
-        # Add edges to create a simple chain
-        workflow.add_edge("Agent1", "Agent2")
-        workflow.add_edge("Agent2", "Agent3")
-
-        # Test auto-setting entry points
-        workflow.auto_set_entry_points()
-        assert "Agent1" in workflow.entry_points
-        results.add_pass(f"{test_name} - Auto Entry Points")
-
-        # Test auto-setting end points
-        workflow.auto_set_end_points()
-        assert "Agent3" in workflow.end_points
-        results.add_pass(f"{test_name} - Auto End Points")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_compile(results: TestResults):
-    """Test workflow compilation"""
-    test_name = "Compile"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Test compilation
-        workflow.compile()
-        assert workflow._compiled is True
-        assert len(workflow._sorted_layers) > 0
-        assert workflow._compilation_timestamp is not None
-        results.add_pass(f"{test_name} - Basic Compilation")
-
-        # Test compilation caching
-        original_timestamp = workflow._compilation_timestamp
-        workflow.compile()  # Should not recompile
-        assert workflow._compilation_timestamp == original_timestamp
-        results.add_pass(f"{test_name} - Compilation Caching")
-
-        # Test compilation invalidation
-        workflow.add_node(create_mock_agent("Agent3"))
-        assert workflow._compiled is False  # Should be invalidated
-        results.add_pass(f"{test_name} - Compilation Invalidation")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_from_spec(results: TestResults):
-    """Test creating workflow from specification"""
-    test_name = "From Spec"
-
-    try:
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        agent3 = create_mock_agent("Agent3")
-
-        # Test basic from_spec
-        workflow = GraphWorkflow.from_spec(
-            agents=[agent1, agent2, agent3],
-            edges=[("Agent1", "Agent2"), ("Agent2", "Agent3")],
-            task="Test task",
-        )
-        assert len(workflow.nodes) == 3
-        assert len(workflow.edges) == 2
-        assert workflow.task == "Test task"
-        results.add_pass(f"{test_name} - Basic")
 
-        # Test with fan-out pattern
-        workflow2 = GraphWorkflow.from_spec(
-            agents=[agent1, agent2, agent3],
-            edges=[("Agent1", ["Agent2", "Agent3"])],
-            verbose=True,
-        )
-        assert len(workflow2.edges) == 2
-        results.add_pass(f"{test_name} - Fan-out")
 
-        # Test with fan-in pattern
-        workflow3 = GraphWorkflow.from_spec(
-            agents=[agent1, agent2, agent3],
-            edges=[(["Agent1", "Agent2"], "Agent3")],
-            verbose=True,
-        )
-        assert len(workflow3.edges) == 2
-        results.add_pass(f"{test_name} - Fan-in")
+def test_graph_workflow_basic_node_creation():
+    """Test basic GraphWorkflow node creation with real agents"""
+    # Test basic node creation
+    agent = create_test_agent("TestAgent", "Test agent for node creation")
+    node = Node.from_agent(agent)
+    assert node.id == "TestAgent"
+    assert node.type == NodeType.AGENT
+    assert node.agent == agent
 
-        # Test with parallel chain - FIXED: avoid cycles
-        workflow4 = GraphWorkflow.from_spec(
-            agents=[agent1, agent2, agent3],
-            edges=[
-                (["Agent1", "Agent2"], ["Agent3"])
-            ],  # Fixed: no self-loops
-            verbose=True,
-        )
-        assert len(workflow4.edges) == 2
-        results.add_pass(f"{test_name} - Parallel Chain")
+    # Test node with custom id
+    node2 = Node(id="CustomID", type=NodeType.AGENT, agent=agent)
+    assert node2.id == "CustomID"
 
-    except Exception as e:
-        results.add_fail(test_name, str(e))
 
+def test_graph_workflow_multi_agent_collaboration():
+    """Test GraphWorkflow with multiple agents in a collaboration scenario"""
+    # Create specialized agents for a business analysis workflow
+    market_researcher = create_test_agent(
+        "Market-Researcher",
+        "Specialist in market analysis and trend identification"
+    )
 
-def test_run_execution(results: TestResults):
-    """Test workflow execution"""
-    test_name = "Run Execution"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Test basic execution
-        results_dict = workflow.run(task="Test task")
-        assert len(results_dict) == 2
-        assert "Agent1" in results_dict
-        assert "Agent2" in results_dict
-        results.add_pass(f"{test_name} - Basic Execution")
-
-        # Test execution with custom task
-        workflow.run(task="Custom task")
-        assert workflow.task == "Custom task"
-        results.add_pass(f"{test_name} - Custom Task")
-
-        # Test execution with max_loops
-        workflow.max_loops = 2
-        results_dict3 = workflow.run(task="Multi-loop task")
-        # Should still return after first loop for backward compatibility
-        assert len(results_dict3) == 2
-        results.add_pass(f"{test_name} - Multi-loop")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_async_run(results: TestResults):
-    """Test async workflow execution"""
-    test_name = "Async Run"
-
-    try:
-        import asyncio
-
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Test async execution
-        async def test_async():
-            results_dict = await workflow.arun(task="Async task")
-            assert len(results_dict) == 2
-            return results_dict
-
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        try:
-            results_dict = loop.run_until_complete(test_async())
-            assert "Agent1" in results_dict
-            assert "Agent2" in results_dict
-            results.add_pass(f"{test_name} - Async Execution")
-        finally:
-            loop.close()
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_visualize_simple(results: TestResults):
-    """Test simple visualization"""
-    test_name = "Visualize Simple"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Test simple visualization
-        viz_output = workflow.visualize_simple()
-        assert "GraphWorkflow" in viz_output
-        assert "Agent1" in viz_output
-        assert "Agent2" in viz_output
-        assert "Agent1 → Agent2" in viz_output
-        results.add_pass(f"{test_name} - Basic")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_visualize_graphviz(results: TestResults):
-    """Test Graphviz visualization"""
-    test_name = "Visualize Graphviz"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Test Graphviz visualization (if available)
-        try:
-            output_file = workflow.visualize(format="png", view=False)
-            assert output_file.endswith(".png")
-            results.add_pass(f"{test_name} - PNG Format")
-        except ImportError:
-            results.add_pass(f"{test_name} - Graphviz not available")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_to_json(results: TestResults):
-    """Test JSON serialization"""
-    test_name = "To JSON"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Test basic JSON serialization
-        json_str = workflow.to_json()
-        data = json.loads(json_str)
-        assert data["name"] == workflow.name
-        assert len(data["nodes"]) == 2
-        assert len(data["edges"]) == 1
-        results.add_pass(f"{test_name} - Basic")
-
-        # Test JSON with conversation
-        json_str2 = workflow.to_json(include_conversation=True)
-        data2 = json.loads(json_str2)
-        assert "conversation" in data2
-        results.add_pass(f"{test_name} - With Conversation")
-
-        # Test JSON with runtime state
-        workflow.compile()
-        json_str3 = workflow.to_json(include_runtime_state=True)
-        data3 = json.loads(json_str3)
-        assert "runtime_state" in data3
-        assert data3["runtime_state"]["is_compiled"] is True
-        results.add_pass(f"{test_name} - With Runtime State")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_from_json(results: TestResults):
-    """Test JSON deserialization"""
-    test_name = "From JSON"
-
-    try:
-        # Create original workflow
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Serialize to JSON
-        json_str = workflow.to_json()
-
-        # Deserialize from JSON - FIXED: handle agent reconstruction
-        try:
-            workflow2 = GraphWorkflow.from_json(json_str)
-            assert workflow2.name == workflow.name
-            assert len(workflow2.nodes) == 2
-            assert len(workflow2.edges) == 1
-            results.add_pass(f"{test_name} - Basic")
-        except Exception as e:
-            # If deserialization fails due to agent reconstruction, that's expected
-            # since we can't fully reconstruct agents from JSON
-            if "does not exist" in str(e) or "NodeType" in str(e):
-                results.add_pass(
-                    f"{test_name} - Basic (expected partial failure)"
-                )
-            else:
-                raise e
-
-        # Test with runtime state restoration
-        workflow.compile()
-        json_str2 = workflow.to_json(include_runtime_state=True)
-        try:
-            workflow3 = GraphWorkflow.from_json(
-                json_str2, restore_runtime_state=True
-            )
-            assert workflow3._compiled is True
-            results.add_pass(f"{test_name} - With Runtime State")
-        except Exception as e:
-            # Same handling for expected partial failures
-            if "does not exist" in str(e) or "NodeType" in str(e):
-                results.add_pass(
-                    f"{test_name} - With Runtime State (expected partial failure)"
-                )
-            else:
-                raise e
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_save_load_file(results: TestResults):
-    """Test saving and loading from file"""
-    test_name = "Save/Load File"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Test saving to file
-        with tempfile.NamedTemporaryFile(
-            suffix=".json", delete=False
-        ) as tmp_file:
-            filepath = tmp_file.name
-
-        try:
-            saved_path = workflow.save_to_file(filepath)
-            assert os.path.exists(saved_path)
-            results.add_pass(f"{test_name} - Save")
-
-            # Test loading from file
-            try:
-                loaded_workflow = GraphWorkflow.load_from_file(
-                    filepath
-                )
-                assert loaded_workflow.name == workflow.name
-                assert len(loaded_workflow.nodes) == 2
-                assert len(loaded_workflow.edges) == 1
-                results.add_pass(f"{test_name} - Load")
-            except Exception as e:
-                # Handle expected partial failures
-                if "does not exist" in str(e) or "NodeType" in str(e):
-                    results.add_pass(
-                        f"{test_name} - Load (expected partial failure)"
-                    )
-                else:
-                    raise e
-
-        finally:
-            if os.path.exists(filepath):
-                os.unlink(filepath)
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_export_summary(results: TestResults):
-    """Test export summary functionality"""
-    test_name = "Export Summary"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Test summary export
-        summary = workflow.export_summary()
-        assert "workflow_info" in summary
-        assert "structure" in summary
-        assert "configuration" in summary
-        assert "compilation_status" in summary
-        assert "agents" in summary
-        assert "connections" in summary
-        assert summary["structure"]["nodes"] == 2
-        assert summary["structure"]["edges"] == 1
-        results.add_pass(f"{test_name} - Basic")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_get_compilation_status(results: TestResults):
-    """Test compilation status retrieval"""
-    test_name = "Get Compilation Status"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_edge("Agent1", "Agent2")
-
-        # Test status before compilation
-        status1 = workflow.get_compilation_status()
-        assert status1["is_compiled"] is False
-        assert status1["cached_layers_count"] == 0
-        results.add_pass(f"{test_name} - Before Compilation")
-
-        # Test status after compilation
-        workflow.compile()
-        status2 = workflow.get_compilation_status()
-        assert status2["is_compiled"] is True
-        assert status2["cached_layers_count"] > 0
-        assert status2["compilation_timestamp"] is not None
-        results.add_pass(f"{test_name} - After Compilation")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_error_handling(results: TestResults):
-    """Test various error conditions"""
-    test_name = "Error Handling"
-
-    try:
-        # Test invalid JSON
-        try:
-            GraphWorkflow.from_json("invalid json")
-            results.add_fail(
-                f"{test_name} - Invalid JSON",
-                "Should raise ValueError",
-            )
-        except (ValueError, json.JSONDecodeError):
-            results.add_pass(f"{test_name} - Invalid JSON")
-
-        # Test file not found
-        try:
-            GraphWorkflow.load_from_file("nonexistent_file.json")
-            results.add_fail(
-                f"{test_name} - File not found",
-                "Should raise FileNotFoundError",
-            )
-        except FileNotFoundError:
-            results.add_pass(f"{test_name} - File not found")
-
-        # Test save to invalid path
-        workflow = GraphWorkflow()
-        try:
-            workflow.save_to_file("/invalid/path/workflow.json")
-            results.add_fail(
-                f"{test_name} - Invalid save path",
-                "Should raise exception",
-            )
-        except (OSError, PermissionError):
-            results.add_pass(f"{test_name} - Invalid save path")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_performance_optimizations(results: TestResults):
-    """Test performance optimization features"""
-    test_name = "Performance Optimizations"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        agent3 = create_mock_agent("Agent3")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_node(agent3)
-        workflow.add_edge("Agent1", "Agent2")
-        workflow.add_edge("Agent2", "Agent3")
-
-        # Test compilation caching
-        start_time = time.time()
-        workflow.compile()
-        first_compile_time = time.time() - start_time
-
-        start_time = time.time()
-        workflow.compile()  # Should use cache
-        second_compile_time = time.time() - start_time
-
-        assert second_compile_time < first_compile_time
-        results.add_pass(f"{test_name} - Compilation Caching")
-
-        # Test predecessor caching
-        workflow._get_predecessors("Agent2")  # First call
-        start_time = time.time()
-        workflow._get_predecessors("Agent2")  # Cached call
-        cached_time = time.time() - start_time
-        assert cached_time < 0.001  # Should be very fast
-        results.add_pass(f"{test_name} - Predecessor Caching")
-
-    except Exception as e:
-        results.add_fail(test_name, str(e))
-
-
-def test_concurrent_execution(results: TestResults):
-    """Test concurrent execution features"""
-    test_name = "Concurrent Execution"
-
-    try:
-        workflow = GraphWorkflow()
-        agent1 = create_mock_agent("Agent1")
-        agent2 = create_mock_agent("Agent2")
-        agent3 = create_mock_agent("Agent3")
-        workflow.add_node(agent1)
-        workflow.add_node(agent2)
-        workflow.add_node(agent3)
-
-        # Test parallel execution with fan-out
-        workflow.add_edges_from_source("Agent1", ["Agent2", "Agent3"])
+    data_analyst = create_test_agent(
+        "Data-Analyst",
+        "Expert in data processing and statistical analysis"
+    )
 
-        # Mock agents to simulate different execution times
-        def slow_run(prompt, *args, **kwargs):
-            time.sleep(0.1)  # Simulate work
-            return f"Output from {prompt[:10]}"
+    strategy_consultant = create_test_agent(
+        "Strategy-Consultant",
+        "Senior consultant for strategic planning and recommendations"
+    )
 
-        agent2.run = Mock(side_effect=slow_run)
-        agent3.run = Mock(side_effect=slow_run)
+    # Create workflow with linear execution path
+    workflow = GraphWorkflow(name="Business-Analysis-Workflow")
+    workflow.add_node(market_researcher)
+    workflow.add_node(data_analyst)
+    workflow.add_node(strategy_consultant)
 
-        start_time = time.time()
-        results_dict = workflow.run(task="Test concurrent execution")
-        execution_time = time.time() - start_time
+    # Add edges to define execution order
+    workflow.add_edge("Market-Researcher", "Data-Analyst")
+    workflow.add_edge("Data-Analyst", "Strategy-Consultant")
 
-        # Should be faster than sequential execution (0.2s vs 0.1s)
-        assert execution_time < 0.15
-        assert len(results_dict) == 3
-        results.add_pass(f"{test_name} - Parallel Execution")
+    # Test workflow execution
+    result = workflow.run("Analyze market opportunities for AI in healthcare")
+    assert result is not None
 
-    except Exception as e:
-        results.add_fail(test_name, str(e))
 
+def test_graph_workflow_parallel_execution():
+    """Test GraphWorkflow with parallel execution paths"""
+    # Create agents for parallel analysis
+    technical_analyst = create_test_agent(
+        "Technical-Analyst",
+        "Technical feasibility and implementation analysis"
+    )
 
-def test_complex_workflow_patterns(results: TestResults):
-    """Test complex workflow patterns"""
-    test_name = "Complex Workflow Patterns"
+    market_analyst = create_test_agent(
+        "Market-Analyst",
+        "Market positioning and competitive analysis"
+    )
 
-    try:
-        # Create a complex workflow with multiple patterns
-        workflow = GraphWorkflow(name="Complex Test Workflow")
+    financial_analyst = create_test_agent(
+        "Financial-Analyst",
+        "Financial modeling and ROI analysis"
+    )
 
-        # Create agents
-        agents = [create_mock_agent(f"Agent{i}") for i in range(1, 7)]
-        for agent in agents:
-            workflow.add_node(agent)
+    risk_assessor = create_test_agent(
+        "Risk-Assessor",
+        "Risk assessment and mitigation planning"
+    )
 
-        # Create complex pattern: fan-out -> parallel -> fan-in
-        workflow.add_edges_from_source(
-            "Agent1", ["Agent2", "Agent3", "Agent4"]
-        )
-        workflow.add_parallel_chain(
-            ["Agent2", "Agent3"], ["Agent4", "Agent5"]
-        )
-        workflow.add_edges_to_target(["Agent4", "Agent5"], "Agent6")
+    # Create workflow with parallel execution
+    workflow = GraphWorkflow(name="Parallel-Analysis-Workflow")
+    workflow.add_node(technical_analyst)
+    workflow.add_node(market_analyst)
+    workflow.add_node(financial_analyst)
+    workflow.add_node(risk_assessor)
 
-        # Test compilation
-        workflow.compile()
-        assert workflow._compiled is True
-        assert len(workflow._sorted_layers) > 0
-        results.add_pass(f"{test_name} - Complex Structure")
+    # Add edges for fan-out execution (one to many)
+    workflow.add_edges_from_source("Technical-Analyst", ["Market-Analyst", "Financial-Analyst", "Risk-Assessor"])
 
-        # Test execution
-        results_dict = workflow.run(task="Complex pattern test")
-        assert len(results_dict) == 6
-        results.add_pass(f"{test_name} - Complex Execution")
+    # Test parallel execution
+    result = workflow.run("Evaluate feasibility of launching a new fintech platform")
+    assert result is not None
 
-        # Test visualization
-        viz_output = workflow.visualize_simple()
-        assert "Complex Test Workflow" in viz_output
-        assert (
-            "Fan-out patterns" in viz_output
-            or "Fan-in patterns" in viz_output
-        )
-        results.add_pass(f"{test_name} - Complex Visualization")
 
-    except Exception as e:
-        results.add_fail(test_name, str(e))
+def test_graph_workflow_complex_topology():
+    """Test GraphWorkflow with complex node topology"""
+    # Create agents for a comprehensive product development workflow
+    product_manager = create_test_agent(
+        "Product-Manager",
+        "Product strategy and roadmap management"
+    )
 
+    ux_designer = create_test_agent(
+        "UX-Designer",
+        "User experience design and research"
+    )
 
-def run_all_tests():
-    """Run all tests and return results"""
-    print("Starting Comprehensive GraphWorkflow Test Suite")
-    print("=" * 60)
+    backend_developer = create_test_agent(
+        "Backend-Developer",
+        "Backend system architecture and development"
+    )
 
-    results = TestResults()
+    frontend_developer = create_test_agent(
+        "Frontend-Developer",
+        "Frontend interface and user interaction development"
+    )
 
-    # Run all test functions
-    test_functions = [
-        test_node_creation,
-        test_edge_creation,
-        test_graph_workflow_initialization,
-        test_add_node,
-        test_add_edge,
-        test_add_edges_from_source,
-        test_add_edges_to_target,
-        test_add_parallel_chain,
-        test_set_entry_end_points,
-        test_auto_set_entry_end_points,
-        test_compile,
-        test_from_spec,
-        test_run_execution,
-        test_async_run,
-        test_visualize_simple,
-        test_visualize_graphviz,
-        test_to_json,
-        test_from_json,
-        test_save_load_file,
-        test_export_summary,
-        test_get_compilation_status,
-        test_error_handling,
-        test_performance_optimizations,
-        test_concurrent_execution,
-        test_complex_workflow_patterns,
-    ]
+    qa_engineer = create_test_agent(
+        "QA-Engineer",
+        "Quality assurance and testing specialist"
+    )
 
-    for test_func in test_functions:
-        try:
-            test_func(results)
-        except Exception as e:
-            results.add_fail(
-                test_func.__name__, f"Test function failed: {str(e)}"
-            )
+    devops_engineer = create_test_agent(
+        "DevOps-Engineer",
+        "Deployment and infrastructure management"
+    )
 
-    # Print summary
-    results.print_summary()
+    # Create workflow with complex dependencies
+    workflow = GraphWorkflow(name="Product-Development-Workflow")
+    workflow.add_node(product_manager)
+    workflow.add_node(ux_designer)
+    workflow.add_node(backend_developer)
+    workflow.add_node(frontend_developer)
+    workflow.add_node(qa_engineer)
+    workflow.add_node(devops_engineer)
+
+    # Define complex execution topology
+    workflow.add_edge("Product-Manager", "UX-Designer")
+    workflow.add_edge("UX-Designer", "Frontend-Developer")
+    workflow.add_edge("Product-Manager", "Backend-Developer")
+    workflow.add_edge("Backend-Developer", "QA-Engineer")
+    workflow.add_edge("Frontend-Developer", "QA-Engineer")
+    workflow.add_edge("QA-Engineer", "DevOps-Engineer")
+
+    # Test complex workflow execution
+    result = workflow.run("Develop a comprehensive e-commerce platform with AI recommendations")
+    assert result is not None
+
+
+def test_graph_workflow_error_handling():
+    """Test GraphWorkflow error handling and validation"""
+    # Test with empty workflow
+    workflow = GraphWorkflow()
+    result = workflow.run("Test task")
+    # Empty workflow should handle gracefully
+    assert result is not None
+
+    # Test workflow compilation and caching
+    researcher = create_test_agent("Researcher", "Research specialist")
+    workflow.add_node(researcher)
+
+    # First run should compile
+    result1 = workflow.run("Research task")
+    assert result1 is not None
+
+    # Second run should use cached compilation
+    result2 = workflow.run("Another research task")
+    assert result2 is not None
+
+
+def test_graph_workflow_node_metadata():
+    """Test GraphWorkflow with node metadata"""
+    # Create agents with different priorities and requirements
+    high_priority_agent = create_test_agent(
+        "High-Priority-Analyst",
+        "High priority analysis specialist"
+    )
 
-    return results
+    standard_agent = create_test_agent(
+        "Standard-Analyst",
+        "Standard analysis agent"
+    )
 
+    # Create workflow and add nodes with metadata
+    workflow = GraphWorkflow(name="Metadata-Workflow")
+    workflow.add_node(high_priority_agent, metadata={"priority": "high", "timeout": 60})
+    workflow.add_node(standard_agent, metadata={"priority": "normal", "timeout": 30})
 
-if __name__ == "__main__":
-    results = run_all_tests()
+    # Add execution dependency
+    workflow.add_edge("High-Priority-Analyst", "Standard-Analyst")
 
-    # Exit with appropriate code
-    if results.failed > 0:
-        sys.exit(1)
-    else:
-        sys.exit(0)
+    # Test execution with metadata
+    result = workflow.run("Analyze business requirements with different priorities")
+    assert result is not None
diff --git a/tests/structs/test_hierarchical_swarm.py b/tests/structs/test_hierarchical_swarm.py
new file mode 100644
index 00000000..7f56b280
--- /dev/null
+++ b/tests/structs/test_hierarchical_swarm.py
@@ -0,0 +1,328 @@
+from swarms import Agent
+from swarms.structs.hiearchical_swarm import HierarchicalSwarm
+
+
+def test_hierarchical_swarm_basic_initialization():
+    """Test basic HierarchicalSwarm initialization"""
+    # Create worker agents
+    research_agent = Agent(
+        agent_name="Research-Specialist",
+        agent_description="Specialist in research and data collection",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    analysis_agent = Agent(
+        agent_name="Analysis-Expert",
+        agent_description="Expert in data analysis and insights",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    implementation_agent = Agent(
+        agent_name="Implementation-Manager",
+        agent_description="Manager for implementation and execution",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create swarm with agents
+    swarm = HierarchicalSwarm(
+        name="Research-Analysis-Implementation-Swarm",
+        description="Hierarchical swarm for comprehensive project execution",
+        agents=[research_agent, analysis_agent, implementation_agent],
+        max_loops=1,
+    )
+
+    # Verify initialization
+    assert swarm.name == "Research-Analysis-Implementation-Swarm"
+    assert swarm.description == "Hierarchical swarm for comprehensive project execution"
+    assert len(swarm.agents) == 3
+    assert swarm.max_loops == 1
+    assert swarm.director is not None
+
+
+def test_hierarchical_swarm_with_director():
+    """Test HierarchicalSwarm with custom director"""
+    # Create a custom director
+    director = Agent(
+        agent_name="Project-Director",
+        agent_description="Senior project director with extensive experience",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create worker agents
+    developer = Agent(
+        agent_name="Senior-Developer",
+        agent_description="Senior software developer",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    tester = Agent(
+        agent_name="QA-Lead",
+        agent_description="Quality assurance lead",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create swarm with custom director
+    swarm = HierarchicalSwarm(
+        name="Software-Development-Swarm",
+        description="Hierarchical swarm for software development projects",
+        director=director,
+        agents=[developer, tester],
+        max_loops=2,
+    )
+
+    assert swarm.director == director
+    assert len(swarm.agents) == 2
+    assert swarm.max_loops == 2
+
+
+def test_hierarchical_swarm_execution():
+    """Test HierarchicalSwarm execution with multiple agents"""
+    # Create specialized agents
+    market_researcher = Agent(
+        agent_name="Market-Researcher",
+        agent_description="Market research specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    product_strategist = Agent(
+        agent_name="Product-Strategist",
+        agent_description="Product strategy and planning expert",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    technical_architect = Agent(
+        agent_name="Technical-Architect",
+        agent_description="Technical architecture and design specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    risk_analyst = Agent(
+        agent_name="Risk-Analyst",
+        agent_description="Risk assessment and mitigation specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create hierarchical swarm
+    swarm = HierarchicalSwarm(
+        name="Product-Development-Swarm",
+        description="Comprehensive product development hierarchical swarm",
+        agents=[market_researcher, product_strategist, technical_architect, risk_analyst],
+        max_loops=1,
+        verbose=True,
+    )
+
+    # Execute swarm
+    result = swarm.run("Develop a comprehensive strategy for a new AI-powered healthcare platform")
+
+    # Verify result structure
+    assert result is not None
+    # HierarchicalSwarm returns a SwarmSpec or conversation history, just ensure it's not None
+
+
+def test_hierarchical_swarm_multiple_loops():
+    """Test HierarchicalSwarm with multiple feedback loops"""
+    # Create agents for iterative refinement
+    planner = Agent(
+        agent_name="Strategic-Planner",
+        agent_description="Strategic planning and project management",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    executor = Agent(
+        agent_name="Task-Executor",
+        agent_description="Task execution and implementation",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    reviewer = Agent(
+        agent_name="Quality-Reviewer",
+        agent_description="Quality assurance and review specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create swarm with multiple loops for iterative refinement
+    swarm = HierarchicalSwarm(
+        name="Iterative-Development-Swarm",
+        description="Hierarchical swarm with iterative feedback loops",
+        agents=[planner, executor, reviewer],
+        max_loops=3,  # Allow multiple iterations
+        verbose=True,
+    )
+
+    # Execute with multiple loops
+    result = swarm.run("Create a detailed project plan for implementing a machine learning recommendation system")
+
+    assert result is not None
+
+
+def test_hierarchical_swarm_error_handling():
+    """Test HierarchicalSwarm error handling"""
+    # Test with empty agents list
+    try:
+        swarm = HierarchicalSwarm(agents=[])
+        assert False, "Should have raised ValueError for empty agents list"
+    except ValueError as e:
+        assert "agents" in str(e).lower() or "empty" in str(e).lower()
+
+    # Test with invalid max_loops
+    researcher = Agent(
+        agent_name="Test-Researcher",
+        agent_description="Test researcher",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    try:
+        swarm = HierarchicalSwarm(agents=[researcher], max_loops=0)
+        assert False, "Should have raised ValueError for invalid max_loops"
+    except ValueError as e:
+        assert "max_loops" in str(e).lower() or "0" in str(e)
+
+
+def test_hierarchical_swarm_collaboration_prompts():
+    """Test HierarchicalSwarm with collaboration prompts enabled"""
+    # Create agents
+    data_analyst = Agent(
+        agent_name="Data-Analyst",
+        agent_description="Data analysis specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    business_analyst = Agent(
+        agent_name="Business-Analyst",
+        agent_description="Business analysis specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create swarm with collaboration prompts
+    swarm = HierarchicalSwarm(
+        name="Collaborative-Analysis-Swarm",
+        description="Hierarchical swarm with enhanced collaboration",
+        agents=[data_analyst, business_analyst],
+        max_loops=1,
+        add_collaboration_prompt=True,
+    )
+
+    # Check that collaboration prompts were added to agents
+    assert data_analyst.system_prompt is not None
+    assert business_analyst.system_prompt is not None
+
+    # Execute swarm
+    result = swarm.run("Analyze customer behavior patterns and provide business recommendations")
+    assert result is not None
+
+
+def test_hierarchical_swarm_with_dashboard():
+    """Test HierarchicalSwarm with interactive dashboard"""
+    # Create agents
+    content_creator = Agent(
+        agent_name="Content-Creator",
+        agent_description="Content creation specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    editor = Agent(
+        agent_name="Editor",
+        agent_description="Content editor and proofreader",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    publisher = Agent(
+        agent_name="Publisher",
+        agent_description="Publishing and distribution specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create swarm with interactive dashboard
+    swarm = HierarchicalSwarm(
+        name="Content-Publishing-Swarm",
+        description="Hierarchical swarm for content creation and publishing",
+        agents=[content_creator, editor, publisher],
+        max_loops=1,
+        interactive=True,
+        verbose=True,
+    )
+
+    # Verify dashboard was created
+    assert swarm.dashboard is not None
+    assert swarm.interactive is True
+
+    # Execute swarm
+    result = swarm.run("Create a comprehensive guide on machine learning best practices")
+    assert result is not None
+
+
+def test_hierarchical_swarm_real_world_scenario():
+    """Test HierarchicalSwarm in a realistic business scenario"""
+    # Create agents representing different business functions
+    market_intelligence = Agent(
+        agent_name="Market-Intelligence-Director",
+        agent_description="Director of market intelligence and competitive analysis",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    product_strategy = Agent(
+        agent_name="Product-Strategy-Manager",
+        agent_description="Product strategy and roadmap manager",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    engineering_lead = Agent(
+        agent_name="Engineering-Lead",
+        agent_description="Senior engineering lead and technical architect",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    operations_manager = Agent(
+        agent_name="Operations-Manager",
+        agent_description="Operations and implementation manager",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    compliance_officer = Agent(
+        agent_name="Compliance-Officer",
+        agent_description="Legal compliance and regulatory specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create comprehensive hierarchical swarm
+    swarm = HierarchicalSwarm(
+        name="Enterprise-Strategy-Swarm",
+        description="Enterprise-level strategic planning and execution swarm",
+        agents=[market_intelligence, product_strategy, engineering_lead, operations_manager, compliance_officer],
+        max_loops=2,
+        verbose=True,
+        add_collaboration_prompt=True,
+    )
+
+    # Test with complex enterprise scenario
+    result = swarm.run(
+        "Develop a comprehensive 5-year strategic plan for our company to become a leader in "
+        "AI-powered enterprise solutions. Consider market opportunities, competitive landscape, "
+        "technical requirements, operational capabilities, and regulatory compliance."
+    )
+
+    assert result is not None
diff --git a/tests/test_main_features.py b/tests/structs/test_main_features.py
similarity index 99%
rename from tests/test_main_features.py
rename to tests/structs/test_main_features.py
index 5d742774..ba5ca392 100644
--- a/tests/test_main_features.py
+++ b/tests/structs/test_main_features.py
@@ -105,6 +105,7 @@ def create_test_agent(
 
 # --- Basic Agent Tests ---
 
+
 def test_basic_agent_functionality():
     """Test basic agent creation and execution"""
     agent = create_test_agent("BasicAgent")
diff --git a/tests/structs/test_majority_voting.py b/tests/structs/test_majority_voting.py
index dcd25f0b..29b9083f 100644
--- a/tests/structs/test_majority_voting.py
+++ b/tests/structs/test_majority_voting.py
@@ -1,152 +1,198 @@
-from unittest.mock import MagicMock
-
 import pytest
-
 from swarms.structs.agent import Agent
 from swarms.structs.majority_voting import MajorityVoting
 
 
-def test_majority_voting_run_concurrent(mocker):
-    # Create mock agents
-    agent1 = MagicMock(spec=Agent)
-    agent2 = MagicMock(spec=Agent)
-    agent3 = MagicMock(spec=Agent)
+def test_majority_voting_basic_execution():
+    """Test basic MajorityVoting execution with multiple agents"""
+    # Create specialized agents with different perspectives
+    geographer = Agent(
+        agent_name="Geography-Expert",
+        agent_description="Expert in geography and world capitals",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-    # Create mock majority voting
-    mv = MajorityVoting(
-        agents=[agent1, agent2, agent3],
-        concurrent=True,
-        multithreaded=False,
+    historian = Agent(
+        agent_name="History-Scholar",
+        agent_description="Historical and cultural context specialist",
+        model_name="gpt-4o",
+        max_loops=1,
     )
 
-    # Create mock conversation
-    conversation = MagicMock()
-    mv.conversation = conversation
+    political_analyst = Agent(
+        agent_name="Political-Analyst",
+        agent_description="Political and administrative specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-    # Create mock results
-    results = ["Paris", "Paris", "Lyon"]
+    # Create majority voting system
+    mv = MajorityVoting(
+        name="Geography-Consensus-System",
+        description="Majority voting system for geographical questions",
+        agents=[geographer, historian, political_analyst],
+        max_loops=1,
+        verbose=True,
+    )
 
-    # Mock agent.run method
-    agent1.run.return_value = results[0]
-    agent2.run.return_value = results[1]
-    agent3.run.return_value = results[2]
+    # Test execution
+    result = mv.run("What is the capital city of France?")
+    assert result is not None
 
-    # Run majority voting
-    majority_vote = mv.run("What is the capital of France?")
 
-    # Assert agent.run method was called with the correct task
-    agent1.run.assert_called_once_with(
-        "What is the capital of France?"
+def test_majority_voting_multiple_loops():
+    """Test MajorityVoting with multiple loops for consensus refinement"""
+    # Create agents with different knowledge bases
+    trivia_expert = Agent(
+        agent_name="Trivia-Expert",
+        agent_description="General knowledge and trivia specialist",
+        model_name="gpt-4o",
+        max_loops=1,
     )
-    agent2.run.assert_called_once_with(
-        "What is the capital of France?"
-    )
-    agent3.run.assert_called_once_with(
-        "What is the capital of France?"
-    )
-
-    # Assert conversation.add method was called with the correct responses
-    conversation.add.assert_any_call(agent1.agent_name, results[0])
-    conversation.add.assert_any_call(agent2.agent_name, results[1])
-    conversation.add.assert_any_call(agent3.agent_name, results[2])
-
-    # Assert majority vote is correct
-    assert majority_vote is not None
 
+    research_analyst = Agent(
+        agent_name="Research-Analyst",
+        agent_description="Research and fact-checking specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-def test_majority_voting_run_multithreaded(mocker):
-    # Create mock agents
-    agent1 = MagicMock(spec=Agent)
-    agent2 = MagicMock(spec=Agent)
-    agent3 = MagicMock(spec=Agent)
+    subject_matter_expert = Agent(
+        agent_name="Subject-Matter-Expert",
+        agent_description="Deep subject matter expertise specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-    # Create mock majority voting
+    # Create majority voting with multiple loops for iterative refinement
     mv = MajorityVoting(
-        agents=[agent1, agent2, agent3],
-        concurrent=False,
-        multithreaded=True,
+        name="Multi-Loop-Consensus-System",
+        description="Majority voting with iterative consensus refinement",
+        agents=[trivia_expert, research_analyst, subject_matter_expert],
+        max_loops=3,  # Allow multiple iterations
+        verbose=True,
     )
 
-    # Create mock conversation
-    conversation = MagicMock()
-    mv.conversation = conversation
+    # Test multi-loop execution
+    result = mv.run("What are the main causes of climate change and what can be done to mitigate them?")
+    assert result is not None
 
-    # Create mock results
-    results = ["Paris", "Paris", "Lyon"]
 
-    # Mock agent.run method
-    agent1.run.return_value = results[0]
-    agent2.run.return_value = results[1]
-    agent3.run.return_value = results[2]
-
-    # Run majority voting
-    majority_vote = mv.run("What is the capital of France?")
-
-    # Assert agent.run method was called with the correct task
-    agent1.run.assert_called_once_with(
-        "What is the capital of France?"
-    )
-    agent2.run.assert_called_once_with(
-        "What is the capital of France?"
-    )
-    agent3.run.assert_called_once_with(
-        "What is the capital of France?"
+def test_majority_voting_business_scenario():
+    """Test MajorityVoting in a realistic business scenario"""
+    # Create agents representing different business perspectives
+    market_strategist = Agent(
+        agent_name="Market-Strategist",
+        agent_description="Market strategy and competitive analysis specialist",
+        model_name="gpt-4o",
+        max_loops=1,
     )
 
-    # Assert conversation.add method was called with the correct responses
-    conversation.add.assert_any_call(agent1.agent_name, results[0])
-    conversation.add.assert_any_call(agent2.agent_name, results[1])
-    conversation.add.assert_any_call(agent3.agent_name, results[2])
+    financial_analyst = Agent(
+        agent_name="Financial-Analyst",
+        agent_description="Financial modeling and ROI analysis specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-    # Assert majority vote is correct
-    assert majority_vote is not None
+    technical_architect = Agent(
+        agent_name="Technical-Architect",
+        agent_description="Technical feasibility and implementation specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
+    risk_manager = Agent(
+        agent_name="Risk-Manager",
+        agent_description="Risk assessment and compliance specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-@pytest.mark.asyncio
-async def test_majority_voting_run_asynchronous(mocker):
-    # Create mock agents
-    agent1 = MagicMock(spec=Agent)
-    agent2 = MagicMock(spec=Agent)
-    agent3 = MagicMock(spec=Agent)
+    operations_expert = Agent(
+        agent_name="Operations-Expert",
+        agent_description="Operations and implementation specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-    # Create mock majority voting
+    # Create majority voting for business decisions
     mv = MajorityVoting(
-        agents=[agent1, agent2, agent3],
-        concurrent=False,
-        multithreaded=False,
-        asynchronous=True,
+        name="Business-Decision-Consensus",
+        description="Majority voting system for business strategic decisions",
+        agents=[market_strategist, financial_analyst, technical_architect, risk_manager, operations_expert],
+        max_loops=2,
+        verbose=True,
     )
 
-    # Create mock conversation
-    conversation = MagicMock()
-    mv.conversation = conversation
+    # Test with complex business decision
+    result = mv.run(
+        "Should our company invest in developing an AI-powered customer service platform? "
+        "Consider market demand, financial implications, technical feasibility, risk factors, "
+        "and operational requirements."
+    )
 
-    # Create mock results
-    results = ["Paris", "Paris", "Lyon"]
+    assert result is not None
 
-    # Mock agent.run method
-    agent1.run.return_value = results[0]
-    agent2.run.return_value = results[1]
-    agent3.run.return_value = results[2]
 
-    # Run majority voting
-    majority_vote = await mv.run("What is the capital of France?")
+def test_majority_voting_error_handling():
+    """Test MajorityVoting error handling and validation"""
+    # Test with empty agents list
+    try:
+        mv = MajorityVoting(agents=[])
+        assert False, "Should have raised ValueError for empty agents list"
+    except ValueError as e:
+        assert "agents" in str(e).lower() or "empty" in str(e).lower()
 
-    # Assert agent.run method was called with the correct task
-    agent1.run.assert_called_once_with(
-        "What is the capital of France?"
+    # Test with invalid max_loops
+    analyst = Agent(
+        agent_name="Test-Analyst",
+        agent_description="Test analyst",
+        model_name="gpt-4o",
+        max_loops=1,
     )
-    agent2.run.assert_called_once_with(
-        "What is the capital of France?"
+
+    try:
+        mv = MajorityVoting(agents=[analyst], max_loops=0)
+        assert False, "Should have raised ValueError for invalid max_loops"
+    except ValueError as e:
+        assert "max_loops" in str(e).lower() or "0" in str(e)
+
+
+def test_majority_voting_different_output_types():
+    """Test MajorityVoting with different output types"""
+    # Create agents for technical analysis
+    security_expert = Agent(
+        agent_name="Security-Expert",
+        agent_description="Cybersecurity and data protection specialist",
+        model_name="gpt-4o",
+        max_loops=1,
     )
-    agent3.run.assert_called_once_with(
-        "What is the capital of France?"
+
+    compliance_officer = Agent(
+        agent_name="Compliance-Officer",
+        agent_description="Regulatory compliance and legal specialist",
+        model_name="gpt-4o",
+        max_loops=1,
     )
 
-    # Assert conversation.add method was called with the correct responses
-    conversation.add.assert_any_call(agent1.agent_name, results[0])
-    conversation.add.assert_any_call(agent2.agent_name, results[1])
-    conversation.add.assert_any_call(agent3.agent_name, results[2])
+    privacy_advocate = Agent(
+        agent_name="Privacy-Advocate",
+        agent_description="Privacy protection and data rights specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-    # Assert majority vote is correct
-    assert majority_vote is not None
+    # Test different output types
+    for output_type in ["dict", "string", "list"]:
+        mv = MajorityVoting(
+            name=f"Output-Type-Test-{output_type}",
+            description=f"Testing output type: {output_type}",
+            agents=[security_expert, compliance_officer, privacy_advocate],
+            max_loops=1,
+            output_type=output_type,
+        )
+
+        result = mv.run("What are the key considerations for implementing GDPR compliance in our data processing systems?")
+        assert result is not None
diff --git a/tests/structs/test_moa.py b/tests/structs/test_moa.py
index 453c7fd5..605432c8 100644
--- a/tests/structs/test_moa.py
+++ b/tests/structs/test_moa.py
@@ -1,84 +1,248 @@
 import pytest
-from unittest.mock import Mock, patch
 from swarms.structs.mixture_of_agents import MixtureOfAgents
 from swarms.structs.agent import Agent
-from swarms_memory import BaseVectorDatabase
-
-
-def test_init():
-    with patch.object(
-        MixtureOfAgents, "agent_check"
-    ) as mock_agent_check, patch.object(
-        MixtureOfAgents, "final_agent_check"
-    ) as mock_final_agent_check, patch.object(
-        MixtureOfAgents, "swarm_initialization"
-    ) as mock_swarm_initialization, patch.object(
-        MixtureOfAgents, "communication_protocol"
-    ) as mock_communication_protocol:
-        agents = [Mock(spec=Agent)]
-        final_agent = Mock(spec=Agent)
-        scp = Mock(spec=BaseVectorDatabase)
-        MixtureOfAgents(
-            agents=agents, final_agent=final_agent, scp=scp
-        )
-        mock_agent_check.assert_called_once()
-        mock_final_agent_check.assert_called_once()
-        mock_swarm_initialization.assert_called_once()
-        mock_communication_protocol.assert_called_once()
-
-
-def test_communication_protocol():
-    agents = [Mock(spec=Agent)]
-    final_agent = Mock(spec=Agent)
-    scp = Mock(spec=BaseVectorDatabase)
-    swarm = MixtureOfAgents(
-        agents=agents, final_agent=final_agent, scp=scp
-    )
-    swarm.communication_protocol()
-    for agent in agents:
-        agent.long_term_memory.assert_called_once_with(scp)
-
-
-def test_agent_check():
-    final_agent = Mock(spec=Agent)
-    with pytest.raises(TypeError):
-        MixtureOfAgents(agents="not a list", final_agent=final_agent)
-    with pytest.raises(TypeError):
-        MixtureOfAgents(
-            agents=["not an agent"], final_agent=final_agent
-        )
 
 
-def test_final_agent_check():
-    agents = [Mock(spec=Agent)]
-    with pytest.raises(TypeError):
-        MixtureOfAgents(agents=agents, final_agent="not an agent")
+def test_mixture_of_agents_basic_initialization():
+    """Test basic MixtureOfAgents initialization with multiple agents"""
+    # Create multiple specialized agents
+    research_agent = Agent(
+        agent_name="Research-Specialist",
+        agent_description="Specialist in research and data collection",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    analysis_agent = Agent(
+        agent_name="Analysis-Expert",
+        agent_description="Expert in data analysis and insights",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
+    strategy_agent = Agent(
+        agent_name="Strategy-Consultant",
+        agent_description="Strategy and planning consultant",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-def test_swarm_initialization():
-    with patch(
-        "swarms.structs.mixture_of_agents.logger"
-    ) as mock_logger:
-        agents = [Mock(spec=Agent)]
-        final_agent = Mock(spec=Agent)
-        swarm = MixtureOfAgents(
-            agents=agents, final_agent=final_agent
-        )
-        swarm.swarm_initialization()
-        assert mock_logger.info.call_count == 3
-
-
-def test_run():
-    with patch("swarms.structs.mixture_of_agents.logger"), patch(
-        "builtins.open", new_callable=Mock
-    ) as mock_open:
-        agents = [Mock(spec=Agent)]
-        final_agent = Mock(spec=Agent)
-        swarm = MixtureOfAgents(
-            agents=agents, final_agent=final_agent
+    # Create aggregator agent
+    aggregator = Agent(
+        agent_name="Aggregator-Agent",
+        agent_description="Agent that aggregates responses from other agents",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create mixture of agents
+    moa = MixtureOfAgents(
+        name="Business-Analysis-Mixture",
+        description="Mixture of agents for comprehensive business analysis",
+        agents=[research_agent, analysis_agent, strategy_agent],
+        aggregator_agent=aggregator,
+        layers=3,
+        max_loops=1,
+    )
+
+    # Verify initialization
+    assert moa.name == "Business-Analysis-Mixture"
+    assert moa.description == "Mixture of agents for comprehensive business analysis"
+    assert len(moa.agents) == 3
+    assert moa.aggregator_agent == aggregator
+    assert moa.layers == 3
+    assert moa.max_loops == 1
+
+
+def test_mixture_of_agents_execution():
+    """Test MixtureOfAgents execution with multiple agents"""
+    # Create diverse agents for different perspectives
+    market_analyst = Agent(
+        agent_name="Market-Analyst",
+        agent_description="Market analysis and trend specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    technical_expert = Agent(
+        agent_name="Technical-Expert",
+        agent_description="Technical feasibility and implementation specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    financial_analyst = Agent(
+        agent_name="Financial-Analyst",
+        agent_description="Financial modeling and ROI specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    risk_assessor = Agent(
+        agent_name="Risk-Assessor",
+        agent_description="Risk assessment and mitigation specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create aggregator for synthesis
+    aggregator = Agent(
+        agent_name="Executive-Summary-Agent",
+        agent_description="Executive summary and recommendation specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create mixture of agents
+    moa = MixtureOfAgents(
+        name="Comprehensive-Evaluation-Mixture",
+        description="Mixture of agents for comprehensive business evaluation",
+        agents=[market_analyst, technical_expert, financial_analyst, risk_assessor],
+        aggregator_agent=aggregator,
+        layers=2,
+        max_loops=1,
+    )
+
+    # Test execution
+    result = moa.run("Evaluate the feasibility of launching an AI-powered healthcare platform")
+    assert result is not None
+
+
+def test_mixture_of_agents_multiple_layers():
+    """Test MixtureOfAgents with multiple layers"""
+    # Create agents for layered analysis
+    data_collector = Agent(
+        agent_name="Data-Collector",
+        agent_description="Data collection and research specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    pattern_analyzer = Agent(
+        agent_name="Pattern-Analyzer",
+        agent_description="Pattern recognition and analysis specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    insight_generator = Agent(
+        agent_name="Insight-Generator",
+        agent_description="Insight generation and interpretation specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create aggregator
+    final_aggregator = Agent(
+        agent_name="Final-Aggregator",
+        agent_description="Final aggregation and conclusion specialist",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create mixture with multiple layers for deeper analysis
+    moa = MixtureOfAgents(
+        name="Multi-Layer-Analysis-Mixture",
+        description="Mixture of agents with multiple analysis layers",
+        agents=[data_collector, pattern_analyzer, insight_generator],
+        aggregator_agent=final_aggregator,
+        layers=4,
+        max_loops=1,
+    )
+
+    # Test multi-layer execution
+    result = moa.run("Analyze customer behavior patterns and provide strategic insights")
+    assert result is not None
+
+
+def test_mixture_of_agents_error_handling():
+    """Test MixtureOfAgents error handling and validation"""
+    # Test with empty agents list
+    try:
+        moa = MixtureOfAgents(agents=[])
+        assert False, "Should have raised ValueError for empty agents list"
+    except ValueError as e:
+        assert "No agents provided" in str(e)
+
+    # Test with invalid aggregator system prompt
+    analyst = Agent(
+        agent_name="Test-Analyst",
+        agent_description="Test analyst",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    try:
+        moa = MixtureOfAgents(
+            agents=[analyst],
+            aggregator_system_prompt=""
         )
-        swarm.run("task")
-        for agent in agents:
-            agent.run.assert_called_once()
-        final_agent.run.assert_called_once()
-        mock_open.assert_called_once_with(swarm.saved_file_name, "w")
+        assert False, "Should have raised ValueError for empty system prompt"
+    except ValueError as e:
+        assert "No aggregator system prompt" in str(e)
+
+
+def test_mixture_of_agents_real_world_scenario():
+    """Test MixtureOfAgents in a realistic business scenario"""
+    # Create agents representing different business functions
+    marketing_director = Agent(
+        agent_name="Marketing-Director",
+        agent_description="Senior marketing director with market expertise",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    product_manager = Agent(
+        agent_name="Product-Manager",
+        agent_description="Product strategy and development manager",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    engineering_lead = Agent(
+        agent_name="Engineering-Lead",
+        agent_description="Senior engineering and technical architecture lead",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    sales_executive = Agent(
+        agent_name="Sales-Executive",
+        agent_description="Enterprise sales and customer relationship executive",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    legal_counsel = Agent(
+        agent_name="Legal-Counsel",
+        agent_description="Legal compliance and regulatory counsel",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create aggregator for executive decision making
+    executive_aggregator = Agent(
+        agent_name="Executive-Decision-Maker",
+        agent_description="Executive decision maker and strategic aggregator",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+
+    # Create comprehensive mixture of agents
+    moa = MixtureOfAgents(
+        name="Executive-Board-Mixture",
+        description="Mixture of agents representing executive board for strategic decisions",
+        agents=[marketing_director, product_manager, engineering_lead, sales_executive, legal_counsel],
+        aggregator_agent=executive_aggregator,
+        layers=3,
+        max_loops=1,
+    )
+
+    # Test with complex business scenario
+    result = moa.run(
+        "Develop a comprehensive go-to-market strategy for our new AI-powered enterprise platform. "
+        "Consider market positioning, technical requirements, competitive landscape, sales channels, "
+        "and legal compliance requirements."
+    )
+
+    assert result is not None
diff --git a/tests/structs/test_multi_agent_collab.py b/tests/structs/test_multi_agent_collab.py
deleted file mode 100644
index 6e97b479..00000000
--- a/tests/structs/test_multi_agent_collab.py
+++ /dev/null
@@ -1,201 +0,0 @@
-import json
-import os
-from unittest.mock import Mock
-
-import pytest
-
-from swarms import Agent
-from swarm_models import OpenAIChat
-from experimental.multi_agent_collab import MultiAgentCollaboration
-
-# Initialize the director agent
-
-director = Agent(
-    agent_name="Director",
-    system_prompt="Directs the tasks for the workers",
-    llm=OpenAIChat(),
-    max_loops=1,
-    dashboard=False,
-    streaming_on=True,
-    verbose=True,
-    stopping_token="<DONE>",
-    state_save_file_type="json",
-    saved_state_path="director.json",
-)
-
-
-# Initialize worker 1
-
-worker1 = Agent(
-    agent_name="Worker1",
-    system_prompt="Generates a transcript for a youtube video on what swarms are",
-    llm=OpenAIChat(),
-    max_loops=1,
-    dashboard=False,
-    streaming_on=True,
-    verbose=True,
-    stopping_token="<DONE>",
-    state_save_file_type="json",
-    saved_state_path="worker1.json",
-)
-
-
-# Initialize worker 2
-worker2 = Agent(
-    agent_name="Worker2",
-    system_prompt="Summarizes the transcript generated by Worker1",
-    llm=OpenAIChat(),
-    max_loops=1,
-    dashboard=False,
-    streaming_on=True,
-    verbose=True,
-    stopping_token="<DONE>",
-    state_save_file_type="json",
-    saved_state_path="worker2.json",
-)
-
-
-# Create a list of agents
-agents = [director, worker1, worker2]
-
-
-@pytest.fixture
-def collaboration():
-    return MultiAgentCollaboration(agents)
-
-
-def test_collaboration_initialization(collaboration):
-    assert len(collaboration.agents) == 2
-    assert callable(collaboration.select_next_speaker)
-    assert collaboration.max_loops == 10
-    assert collaboration.results == []
-    assert collaboration.logging is True
-
-
-def test_reset(collaboration):
-    collaboration.reset()
-    for agent in collaboration.agents:
-        assert agent.step == 0
-
-
-def test_inject(collaboration):
-    collaboration.inject("TestName", "TestMessage")
-    for agent in collaboration.agents:
-        assert "TestName" in agent.history[-1]
-        assert "TestMessage" in agent.history[-1]
-
-
-def test_inject_agent(collaboration):
-    agent3 = Agent(llm=OpenAIChat(), max_loops=2)
-    collaboration.inject_agent(agent3)
-    assert len(collaboration.agents) == 3
-    assert agent3 in collaboration.agents
-
-
-def test_step(collaboration):
-    collaboration.step()
-    for agent in collaboration.agents:
-        assert agent.step == 1
-
-
-def test_ask_for_bid(collaboration):
-    agent = Mock()
-    agent.bid.return_value = "<5>"
-    bid = collaboration.ask_for_bid(agent)
-    assert bid == 5
-
-
-def test_select_next_speaker(collaboration):
-    collaboration.select_next_speaker = Mock(return_value=0)
-    idx = collaboration.select_next_speaker(1, collaboration.agents)
-    assert idx == 0
-
-
-def test_run(collaboration):
-    collaboration.run()
-    for agent in collaboration.agents:
-        assert agent.step == collaboration.max_loops
-
-
-def test_format_results(collaboration):
-    collaboration.results = [
-        {"agent": "Agent1", "response": "Response1"}
-    ]
-    formatted_results = collaboration.format_results(
-        collaboration.results
-    )
-    assert "Agent1 responded: Response1" in formatted_results
-
-
-def test_save_and_load(collaboration):
-    collaboration.save()
-    loaded_state = collaboration.load()
-    assert loaded_state["_step"] == collaboration._step
-    assert loaded_state["results"] == collaboration.results
-
-
-def test_performance(collaboration):
-    performance_data = collaboration.performance()
-    for agent in collaboration.agents:
-        assert agent.name in performance_data
-        assert "metrics" in performance_data[agent.name]
-
-
-def test_set_interaction_rules(collaboration):
-    rules = {"rule1": "action1", "rule2": "action2"}
-    collaboration.set_interaction_rules(rules)
-    assert hasattr(collaboration, "interaction_rules")
-    assert collaboration.interaction_rules == rules
-
-
-def test_repr(collaboration):
-    repr_str = repr(collaboration)
-    assert isinstance(repr_str, str)
-    assert "MultiAgentCollaboration" in repr_str
-
-
-def test_load(collaboration):
-    state = {
-        "step": 5,
-        "results": [{"agent": "Agent1", "response": "Response1"}],
-    }
-    with open(collaboration.saved_file_path_name, "w") as file:
-        json.dump(state, file)
-
-    loaded_state = collaboration.load()
-    assert loaded_state["_step"] == state["step"]
-    assert loaded_state["results"] == state["results"]
-
-
-def test_save(collaboration, tmp_path):
-    collaboration.saved_file_path_name = tmp_path / "test_save.json"
-    collaboration.save()
-
-    with open(collaboration.saved_file_path_name) as file:
-        saved_data = json.load(file)
-
-    assert saved_data["_step"] == collaboration._step
-    assert saved_data["results"] == collaboration.results
-
-
-# Add more tests here...
-
-# Add more parameterized tests for different scenarios...
-
-
-# Example of exception testing
-def test_exception_handling(collaboration):
-    agent = Mock()
-    agent.bid.side_effect = ValueError("Invalid bid")
-    with pytest.raises(ValueError):
-        collaboration.ask_for_bid(agent)
-
-
-# Add more exception testing...
-
-
-# Example of environment variable testing (if applicable)
-@pytest.mark.parametrize("env_var", ["ENV_VAR_1", "ENV_VAR_2"])
-def test_environment_variables(collaboration, monkeypatch, env_var):
-    monkeypatch.setenv(env_var, "test_value")
-    assert os.getenv(env_var) == "test_value"
diff --git a/tests/structs/test_recursive_workflow.py b/tests/structs/test_recursive_workflow.py
deleted file mode 100644
index 75cd5145..00000000
--- a/tests/structs/test_recursive_workflow.py
+++ /dev/null
@@ -1,74 +0,0 @@
-from unittest.mock import Mock, create_autospec
-
-import pytest
-
-from swarm_models import OpenAIChat
-from swarms.structs import RecursiveWorkflow, Task
-
-
-def test_add():
-    workflow = RecursiveWorkflow(stop_token="<DONE>")
-    task = Mock(spec=Task)
-    workflow.add(task)
-    assert task in workflow.tasks
-
-
-def test_run():
-    workflow = RecursiveWorkflow(stop_token="<DONE>")
-    agent1 = create_autospec(OpenAIChat)
-    agent2 = create_autospec(OpenAIChat)
-    task1 = Task("What's the weather in miami", agent1)
-    task2 = Task("What's the weather in miami", agent2)
-    workflow.add(task1)
-    workflow.add(task2)
-
-    agent1.execute.return_value = "Not done"
-    agent2.execute.return_value = "<DONE>"
-
-    workflow.run()
-
-    assert agent1.execute.call_count >= 1
-    assert agent2.execute.call_count == 1
-
-
-def test_run_no_tasks():
-    workflow = RecursiveWorkflow(stop_token="<DONE>")
-    # No tasks are added to the workflow
-    # This should not raise any errors
-    workflow.run()
-
-
-def test_run_stop_token_not_in_result():
-    workflow = RecursiveWorkflow(stop_token="<DONE>")
-    agent = create_autospec(OpenAIChat)
-    task = Task("What's the weather in miami", agent)
-    workflow.add(task)
-
-    agent.execute.return_value = "Not done"
-
-    # If the stop token is never found in the result, the workflow could run forever.
-    # To prevent this, we'll set a maximum number of iterations.
-    max_iterations = 1000
-    for _ in range(max_iterations):
-        try:
-            workflow.run()
-        except RecursionError:
-            pytest.fail(
-                "RecursiveWorkflow.run caused a RecursionError"
-            )
-
-    assert agent.execute.call_count == max_iterations
-
-
-def test_run_stop_token_in_result():
-    workflow = RecursiveWorkflow(stop_token="<DONE>")
-    agent = create_autospec(OpenAIChat)
-    task = Task("What's the weather in miami", agent)
-    workflow.add(task)
-
-    agent.execute.return_value = "<DONE>"
-
-    workflow.run()
-
-    # If the stop token is found in the result, the workflow should stop running the task.
-    assert agent.execute.call_count == 1
diff --git a/tests/structs/test_sequential_workflow.py b/tests/structs/test_sequential_workflow.py
index 1327d0ae..15a7ab16 100644
--- a/tests/structs/test_sequential_workflow.py
+++ b/tests/structs/test_sequential_workflow.py
@@ -1,65 +1,7 @@
-import asyncio
-import os
-from unittest.mock import patch
-
 import pytest
 
-from swarm_models import OpenAIChat
-from swarms.structs.agent import Agent
-from swarms.structs.sequential_workflow import (
-    SequentialWorkflow,
-    Task,
-)
-
-# Mock the OpenAI API key using environment variables
-os.environ["OPENAI_API_KEY"] = "mocked_api_key"
-
-
-# Mock OpenAIChat class for testing
-class MockOpenAIChat:
-    def __init__(self, *args, **kwargs):
-        pass
-
-    def run(self, *args, **kwargs):
-        return "Mocked result"
-
-
-# Mock Agent class for testing
-class MockAgent:
-    def __init__(self, *args, **kwargs):
-        pass
-
-    def run(self, *args, **kwargs):
-        return "Mocked result"
-
-
-# Mock SequentialWorkflow class for testing
-class MockSequentialWorkflow:
-    def __init__(self, *args, **kwargs):
-        pass
+from swarms import Agent, SequentialWorkflow
 
-    def add(self, *args, **kwargs):
-        pass
-
-    def run(self):
-        pass
-
-
-# Test Task class
-def test_task_initialization():
-    description = "Sample Task"
-    agent = MockOpenAIChat()
-    task = Task(description=description, agent=agent)
-    assert task.description == description
-    assert task.agent == agent
-
-
-def test_task_execute():
-    description = "Sample Task"
-    agent = MockOpenAIChat()
-    task = Task(description=description, agent=agent)
-    task.run()
-    assert task.result == "Mocked result"
 
 
 # Test SequentialWorkflow class
@@ -77,263 +19,289 @@ def test_sequential_workflow_initialization():
     assert workflow.dashboard is False
 
 
-def test_sequential_workflow_add_task():
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = MockOpenAIChat()
-    workflow.add(task_description, task_flow)
-    assert len(workflow.tasks) == 1
-    assert workflow.tasks[0].description == task_description
-    assert workflow.tasks[0].agent == task_flow
-
-
-def test_sequential_workflow_reset_workflow():
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = MockOpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.reset_workflow()
-    assert workflow.tasks[0].result is None
-
-
-def test_sequential_workflow_get_task_results():
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = MockOpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.run()
-    results = workflow.get_task_results()
-    assert len(results) == 1
-    assert task_description in results
-    assert results[task_description] == "Mocked result"
-
-
-def test_sequential_workflow_remove_task():
-    workflow = SequentialWorkflow()
-    task1_description = "Task 1"
-    task2_description = "Task 2"
-    task1_flow = MockOpenAIChat()
-    task2_flow = MockOpenAIChat()
-    workflow.add(task1_description, task1_flow)
-    workflow.add(task2_description, task2_flow)
-    workflow.remove_task(task1_description)
-    assert len(workflow.tasks) == 1
-    assert workflow.tasks[0].description == task2_description
-
-
-def test_sequential_workflow_update_task():
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = MockOpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.update_task(task_description, max_tokens=1000)
-    assert workflow.tasks[0].kwargs["max_tokens"] == 1000
-
-
-def test_sequential_workflow_save_workflow_state():
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = MockOpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.save_workflow_state("test_state.json")
-    assert os.path.exists("test_state.json")
-    os.remove("test_state.json")
-
+def test_sequential_workflow_initialization_with_agents():
+    """Test SequentialWorkflow initialization with agents"""
+    agent1 = Agent(
+        agent_name="Agent-1",
+        agent_description="First test agent",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent2 = Agent(
+        agent_name="Agent-2",
+        agent_description="Second test agent",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-def test_sequential_workflow_load_workflow_state():
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = MockOpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.save_workflow_state("test_state.json")
-    workflow.load_workflow_state("test_state.json")
-    assert len(workflow.tasks) == 1
-    assert workflow.tasks[0].description == task_description
-    os.remove("test_state.json")
+    workflow = SequentialWorkflow(
+        name="Test-Workflow",
+        description="Test workflow with multiple agents",
+        agents=[agent1, agent2],
+        max_loops=1,
+    )
 
+    assert isinstance(workflow, SequentialWorkflow)
+    assert workflow.name == "Test-Workflow"
+    assert workflow.description == "Test workflow with multiple agents"
+    assert len(workflow.agents) == 2
+    assert workflow.agents[0] == agent1
+    assert workflow.agents[1] == agent2
+    assert workflow.max_loops == 1
 
-def test_sequential_workflow_run():
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = MockOpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.run()
-    assert workflow.tasks[0].result == "Mocked result"
 
+def test_sequential_workflow_multi_agent_execution():
+    """Test SequentialWorkflow execution with multiple agents"""
+    agent1 = Agent(
+        agent_name="Research-Agent",
+        agent_description="Agent for research tasks",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent2 = Agent(
+        agent_name="Analysis-Agent",
+        agent_description="Agent for analyzing research results",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent3 = Agent(
+        agent_name="Summary-Agent",
+        agent_description="Agent for summarizing findings",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-def test_sequential_workflow_workflow_bootup(capfd):
-    workflow = SequentialWorkflow()
-    workflow.workflow_bootup()
-    out, _ = capfd.readouterr()
-    assert "Sequential Workflow Initializing..." in out
+    workflow = SequentialWorkflow(
+        name="Multi-Agent-Research-Workflow",
+        description="Workflow for comprehensive research, analysis, and summarization",
+        agents=[agent1, agent2, agent3],
+        max_loops=1,
+    )
 
+    # Test that the workflow executes successfully
+    result = workflow.run("Analyze the impact of renewable energy on climate change")
+    assert result is not None
+    # SequentialWorkflow may return different types based on output_type, just ensure it's not None
 
-def test_sequential_workflow_workflow_dashboard(capfd):
-    workflow = SequentialWorkflow()
-    workflow.workflow_dashboard()
-    out, _ = capfd.readouterr()
-    assert "Sequential Workflow Dashboard" in out
 
+def test_sequential_workflow_batched_execution():
+    """Test batched execution of SequentialWorkflow"""
+    agent1 = Agent(
+        agent_name="Data-Collector",
+        agent_description="Agent for collecting data",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent2 = Agent(
+        agent_name="Data-Processor",
+        agent_description="Agent for processing collected data",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
-# Mock Agent class for async testing
-class MockAsyncAgent:
-    def __init__(self, *args, **kwargs):
-        pass
+    workflow = SequentialWorkflow(
+        name="Batched-Processing-Workflow",
+        agents=[agent1, agent2],
+        max_loops=1,
+    )
 
-    async def arun(self, *args, **kwargs):
-        return "Mocked result"
+    # Test batched execution
+    tasks = [
+        "Analyze solar energy trends",
+        "Evaluate wind power efficiency",
+        "Compare renewable energy sources"
+    ]
+    results = workflow.run_batched(tasks)
+    assert results is not None
+    # run_batched returns a list of results
+    assert isinstance(results, list)
+    assert len(results) == 3
 
 
-# Test async execution in SequentialWorkflow
 @pytest.mark.asyncio
-async def test_sequential_workflow_arun():
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = MockAsyncAgent()
-    workflow.add(task_description, task_flow)
-    await workflow.arun()
-    assert workflow.tasks[0].result == "Mocked result"
-
-
-def test_real_world_usage_with_openai_key():
-    # Initialize the language model
-    llm = OpenAIChat()
-    assert isinstance(llm, OpenAIChat)
+async def test_sequential_workflow_async_execution():
+    """Test async execution of SequentialWorkflow"""
+    agent1 = Agent(
+        agent_name="Async-Research-Agent",
+        agent_description="Agent for async research tasks",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent2 = Agent(
+        agent_name="Async-Analysis-Agent",
+        agent_description="Agent for async analysis",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
+    workflow = SequentialWorkflow(
+        name="Async-Workflow",
+        agents=[agent1, agent2],
+        max_loops=1,
+    )
 
-def test_real_world_usage_with_flow_and_openai_key():
-    # Initialize a agent with the language model
-    agent = Agent(llm=OpenAIChat())
-    assert isinstance(agent, Agent)
+    # Test async execution
+    result = await workflow.run_async("Analyze AI trends in 2024")
+    assert result is not None
 
 
-def test_real_world_usage_with_sequential_workflow():
-    # Initialize a sequential workflow
-    workflow = SequentialWorkflow()
-    assert isinstance(workflow, SequentialWorkflow)
+@pytest.mark.asyncio
+async def test_sequential_workflow_concurrent_execution():
+    """Test concurrent execution of SequentialWorkflow"""
+    agent1 = Agent(
+        agent_name="Concurrent-Research-Agent",
+        agent_description="Agent for concurrent research",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent2 = Agent(
+        agent_name="Concurrent-Analysis-Agent",
+        agent_description="Agent for concurrent analysis",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent3 = Agent(
+        agent_name="Concurrent-Summary-Agent",
+        agent_description="Agent for concurrent summarization",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
+    workflow = SequentialWorkflow(
+        name="Concurrent-Workflow",
+        agents=[agent1, agent2, agent3],
+        max_loops=1,
+    )
 
-def test_real_world_usage_add_tasks():
-    # Create a sequential workflow and add tasks
-    workflow = SequentialWorkflow()
-    task1_description = "Task 1"
-    task2_description = "Task 2"
-    task1_flow = OpenAIChat()
-    task2_flow = OpenAIChat()
-    workflow.add(task1_description, task1_flow)
-    workflow.add(task2_description, task2_flow)
-    assert len(workflow.tasks) == 2
-    assert workflow.tasks[0].description == task1_description
-    assert workflow.tasks[1].description == task2_description
-
-
-def test_real_world_usage_run_workflow():
-    # Create a sequential workflow, add a task, and run the workflow
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = OpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.run()
-    assert workflow.tasks[0].result is not None
+    # Test concurrent execution
+    tasks = [
+        "Research quantum computing advances",
+        "Analyze blockchain technology trends",
+        "Evaluate machine learning applications"
+    ]
+    results = await workflow.run_concurrent(tasks)
+    assert results is not None
+    # run_concurrent returns a list of results
+    assert isinstance(results, list)
+    assert len(results) == 3
 
 
-def test_real_world_usage_dashboard_display():
-    # Create a sequential workflow, add tasks, and display the dashboard
-    workflow = SequentialWorkflow()
-    task1_description = "Task 1"
-    task2_description = "Task 2"
-    task1_flow = OpenAIChat()
-    task2_flow = OpenAIChat()
-    workflow.add(task1_description, task1_flow)
-    workflow.add(task2_description, task2_flow)
-    with patch("builtins.print") as mock_print:
-        workflow.workflow_dashboard()
-        mock_print.assert_called()
-
-
-def test_real_world_usage_async_execution():
-    # Create a sequential workflow, add an async task, and run the workflow asynchronously
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    async_task_flow = OpenAIChat()
 
-    async def async_run_workflow():
-        await workflow.arun()
 
-    workflow.add(task_description, async_task_flow)
-    asyncio.run(async_run_workflow())
-    assert workflow.tasks[0].result is not None
+def test_sequential_workflow_with_multi_agent_collaboration():
+    """Test SequentialWorkflow with multi-agent collaboration prompts"""
+    agent1 = Agent(
+        agent_name="Market-Research-Agent",
+        agent_description="Agent for market research",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent2 = Agent(
+        agent_name="Competitive-Analysis-Agent",
+        agent_description="Agent for competitive analysis",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent3 = Agent(
+        agent_name="Strategy-Development-Agent",
+        agent_description="Agent for developing business strategies",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
+    workflow = SequentialWorkflow(
+        name="Business-Strategy-Workflow",
+        description="Comprehensive business strategy development workflow",
+        agents=[agent1, agent2, agent3],
+        max_loops=1,
+        multi_agent_collab_prompt=True,
+    )
 
-def test_real_world_usage_multiple_loops():
-    # Create a sequential workflow with multiple loops, add a task, and run the workflow
-    workflow = SequentialWorkflow(max_loops=3)
-    task_description = "Sample Task"
-    task_flow = OpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.run()
-    assert workflow.tasks[0].result is not None
+    # Test that collaboration prompt is added
+    assert agent1.system_prompt is not None
+    assert agent2.system_prompt is not None
+    assert agent3.system_prompt is not None
+
+    # Test execution
+    result = workflow.run("Develop a business strategy for entering the AI market")
+    assert result is not None
+
+
+def test_sequential_workflow_error_handling():
+    """Test SequentialWorkflow error handling"""
+    # Test with invalid agents list
+    with pytest.raises(ValueError, match="Agents list cannot be None or empty"):
+        SequentialWorkflow(agents=None)
+
+    with pytest.raises(ValueError, match="Agents list cannot be None or empty"):
+        SequentialWorkflow(agents=[])
+
+    # Test with zero max_loops
+    with pytest.raises(ValueError, match="max_loops cannot be 0"):
+        agent1 = Agent(
+            agent_name="Test-Agent",
+            agent_description="Test agent",
+            model_name="gpt-4o",
+            max_loops=1,
+        )
+        SequentialWorkflow(agents=[agent1], max_loops=0)
+
+
+def test_sequential_workflow_agent_names_extraction():
+    """Test that SequentialWorkflow properly extracts agent names for flow"""
+    agent1 = Agent(
+        agent_name="Alpha-Agent",
+        agent_description="First agent",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent2 = Agent(
+        agent_name="Beta-Agent",
+        agent_description="Second agent",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent3 = Agent(
+        agent_name="Gamma-Agent",
+        agent_description="Third agent",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
+    workflow = SequentialWorkflow(
+        name="Test-Flow-Workflow",
+        agents=[agent1, agent2, agent3],
+        max_loops=1,
+    )
 
-def test_real_world_usage_autosave_state():
-    # Create a sequential workflow with autosave, add a task, run the workflow, and check if state is saved
-    workflow = SequentialWorkflow(autosave=True)
-    task_description = "Sample Task"
-    task_flow = OpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.run()
-    assert workflow.tasks[0].result is not None
-    assert os.path.exists("sequential_workflow_state.json")
-    os.remove("sequential_workflow_state.json")
+    # Test flow string generation
+    expected_flow = "Alpha-Agent -> Beta-Agent -> Gamma-Agent"
+    assert workflow.flow == expected_flow
 
 
-def test_real_world_usage_load_state():
-    # Create a sequential workflow, add a task, save state, load state, and run the workflow
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = OpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.run()
-    workflow.save_workflow_state("test_state.json")
-    workflow.load_workflow_state("test_state.json")
-    workflow.run()
-    assert workflow.tasks[0].result is not None
-    os.remove("test_state.json")
-
-
-def test_real_world_usage_update_task_args():
-    # Create a sequential workflow, add a task, and update task arguments
-    workflow = SequentialWorkflow()
-    task_description = "Sample Task"
-    task_flow = OpenAIChat()
-    workflow.add(task_description, task_flow)
-    workflow.update_task(task_description, max_tokens=1000)
-    assert workflow.tasks[0].kwargs["max_tokens"] == 1000
+def test_sequential_workflow_team_awareness():
+    """Test SequentialWorkflow with team awareness enabled"""
+    agent1 = Agent(
+        agent_name="Team-Member-1",
+        agent_description="First team member",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
+    agent2 = Agent(
+        agent_name="Team-Member-2",
+        agent_description="Second team member",
+        model_name="gpt-4o",
+        max_loops=1,
+    )
 
+    workflow = SequentialWorkflow(
+        name="Team-Aware-Workflow",
+        description="Workflow with team awareness",
+        agents=[agent1, agent2],
+        max_loops=1,
+        team_awareness=True,
+    )
 
-def test_real_world_usage_remove_task():
-    # Create a sequential workflow, add tasks, remove a task, and run the workflow
-    workflow = SequentialWorkflow()
-    task1_description = "Task 1"
-    task2_description = "Task 2"
-    task1_flow = OpenAIChat()
-    task2_flow = OpenAIChat()
-    workflow.add(task1_description, task1_flow)
-    workflow.add(task2_description, task2_flow)
-    workflow.remove_task(task1_description)
-    workflow.run()
-    assert len(workflow.tasks) == 1
-    assert workflow.tasks[0].description == task2_description
-
-
-def test_real_world_usage_with_environment_variables():
-    # Ensure that the OpenAI API key is set using environment variables
-    assert "OPENAI_API_KEY" in os.environ
-    assert os.environ["OPENAI_API_KEY"] == "mocked_api_key"
-    del os.environ["OPENAI_API_KEY"]  # Clean up after the test
-
-
-def test_real_world_usage_no_openai_key():
-    # Ensure that an exception is raised when the OpenAI API key is not set
-    with pytest.raises(ValueError):
-        OpenAIChat()  # API key not provided, should raise an exception
+    # Test that workflow initializes successfully with team awareness
+    assert workflow.team_awareness is True
+    assert len(workflow.agents) == 2
diff --git a/tests/utils/test_display_markdown_message.py b/tests/utils/test_display_markdown_message.py
deleted file mode 100644
index 1b7cadaa..00000000
--- a/tests/utils/test_display_markdown_message.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# import necessary modules
-from unittest import mock
-
-import pytest
-from rich.console import Console
-from rich.markdown import Markdown
-from rich.rule import Rule
-
-from swarms.utils import display_markdown_message
-
-
-def test_basic_message():
-    # Test basic message functionality
-    with mock.patch.object(Console, "print") as mock_print:
-        display_markdown_message("This is a test")
-        mock_print.assert_called_once_with(
-            Markdown("This is a test", style="cyan")
-        )
-
-
-def test_empty_message():
-    # Test how function handles empty input
-    with mock.patch.object(Console, "print") as mock_print:
-        display_markdown_message("")
-        mock_print.assert_called_once_with("")
-
-
-@pytest.mark.parametrize("color", ["cyan", "red", "blue"])
-def test_colors(color):
-    # Test different colors
-    with mock.patch.object(Console, "print") as mock_print:
-        display_markdown_message("This is a test", color)
-        mock_print.assert_called_once_with(
-            Markdown("This is a test", style=color)
-        )
-
-
-def test_dash_line():
-    # Test how function handles "---"
-    with mock.patch.object(Console, "print") as mock_print:
-        display_markdown_message("---")
-        mock_print.assert_called_once_with(Rule(style="cyan"))
-
-
-def test_message_with_whitespace():
-    # Test how function handles message with whitespaces
-    with mock.patch.object(Console, "print") as mock_print:
-        display_markdown_message(" \n Test \n --- \n Test \n")
-        calls = [
-            mock.call(""),
-            mock.call(Markdown("Test", style="cyan")),
-            mock.call(Rule(style="cyan")),
-            mock.call(Markdown("Test", style="cyan")),
-            mock.call(""),
-        ]
-        mock_print.assert_has_calls(calls)
-
-
-def test_message_start_with_greater_than():
-    # Test how function handles message line starting with ">"
-    with mock.patch.object(Console, "print") as mock_print:
-        display_markdown_message(">This is a test")
-        calls = [
-            mock.call(Markdown(">This is a test", style="cyan")),
-            mock.call(""),
-        ]
-        mock_print.assert_has_calls(calls)
diff --git a/tests/utils/test_docstring_parser.py b/tests/utils/test_docstring_parser.py
index 2f1f2114..956d0927 100644
--- a/tests/utils/test_docstring_parser.py
+++ b/tests/utils/test_docstring_parser.py
@@ -1,14 +1,8 @@
-"""
-Test suite for the custom docstring parser implementation.
-
-This module contains comprehensive tests to ensure the docstring parser
-works correctly with various docstring formats and edge cases.
-"""
-
 import pytest
+
 from swarms.utils.docstring_parser import (
-    parse,
     DocstringParam,
+    parse,
 )
 
 
diff --git a/tests/agent/agents/test_litellm_args_kwargs.py b/tests/utils/test_litellm_args_kwargs.py
similarity index 100%
rename from tests/agent/agents/test_litellm_args_kwargs.py
rename to tests/utils/test_litellm_args_kwargs.py
diff --git a/tests/utils/test_math_eval.py b/tests/utils/test_math_eval.py
deleted file mode 100644
index 642865b6..00000000
--- a/tests/utils/test_math_eval.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from swarms.utils.math_eval import math_eval
-
-
-def func1_no_exception(x):
-    return x + 2
-
-
-def func2_no_exception(x):
-    return x + 2
-
-
-def func1_with_exception(x):
-    raise ValueError()
-
-
-def func2_with_exception(x):
-    raise ValueError()
-
-
-def test_same_results_no_exception(caplog):
-    @math_eval(func1_no_exception, func2_no_exception)
-    def test_func(x):
-        return x
-
-    result1, result2 = test_func(5)
-    assert result1 == result2 == 7
-    assert "Outputs do not match" not in caplog.text
-
-
-def test_func1_exception(caplog):
-    @math_eval(func1_with_exception, func2_no_exception)
-    def test_func(x):
-        return x
-
-    result1, result2 = test_func(5)
-    assert result1 is None
-    assert result2 == 7
-    assert "Error in func1:" in caplog.text
-
-
-# similar tests for func2_with_exception and when func1 and func2 return different results
diff --git a/tests/utils/test_metrics_decorator.py b/tests/utils/test_metrics_decorator.py
deleted file mode 100644
index 8c3a8af9..00000000
--- a/tests/utils/test_metrics_decorator.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# pytest imports
-import time
-from unittest.mock import Mock
-
-import pytest
-
-# Imports from your project
-from swarms.utils import metrics_decorator
-
-
-# Basic successful test
-def test_metrics_decorator_success():
-    @metrics_decorator
-    def decorated_func():
-        time.sleep(0.1)
-        return [1, 2, 3, 4, 5]
-
-    metrics = decorated_func()
-    assert "Time to First Token" in metrics
-    assert "Generation Latency" in metrics
-    assert "Throughput:" in metrics
-
-
-@pytest.mark.parametrize(
-    "wait_time, return_val",
-    [
-        (0, []),
-        (0.1, [1, 2, 3]),
-        (0.5, list(range(50))),
-    ],
-)
-def test_metrics_decorator_with_various_wait_times_and_return_vals(
-    wait_time, return_val
-):
-    @metrics_decorator
-    def decorated_func():
-        time.sleep(wait_time)
-        return return_val
-
-    metrics = decorated_func()
-    assert "Time to First Token" in metrics
-    assert "Generation Latency" in metrics
-    assert "Throughput:" in metrics
-
-
-# Test to ensure that mocked time function was called and throughputs are calculated as expected
-def test_metrics_decorator_with_mocked_time(mocker):
-    mocked_time = Mock()
-    mocker.patch("time.time", mocked_time)
-
-    mocked_time.side_effect = [0, 5, 10, 20]
-
-    @metrics_decorator
-    def decorated_func():
-        return ["tok_1", "tok_2"]
-
-    metrics = decorated_func()
-    assert (
-        metrics
-        == """
-    Time to First Token: 5
-    Generation Latency: 20
-    Throughput: 0.1
-    """
-    )
-    mocked_time.assert_any_call()
-
-
-# Test to ensure that exceptions in the decorated function are propagated
-def test_metrics_decorator_raises_exception():
-    @metrics_decorator
-    def decorated_func():
-        raise ValueError("Oops!")
-
-    with pytest.raises(ValueError, match="Oops!"):
-        decorated_func()
-
-
-# Test to ensure proper handling when decorated function returns non-list value
-def test_metrics_decorator_with_non_list_return_val():
-    @metrics_decorator
-    def decorated_func():
-        return "Hello, world!"
-
-    metrics = decorated_func()
-    assert "Time to First Token" in metrics
-    assert "Generation Latency" in metrics
-    assert "Throughput:" in metrics