Merge branch 'master' into mvstream

pull/1147/head
Kye Gomez 2 weeks ago committed by GitHub
commit e839dc5206
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,150 @@
name: Test Main Features
on:
push:
paths:
- 'tests/test_main_features.py'
- 'swarms/**'
- 'requirements.txt'
- 'pyproject.toml'
branches: [ "master" ]
pull_request:
paths:
- 'tests/test_main_features.py'
- 'swarms/**'
- 'requirements.txt'
- 'pyproject.toml'
branches: [ "master" ]
workflow_dispatch: # Allow manual triggering
jobs:
test-main-features:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Set up Python 3.10
uses: actions/setup-python@v6
with:
python-version: "3.10"
- name: Cache pip dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Configure Poetry
run: |
poetry config virtualenvs.create true
poetry config virtualenvs.in-project true
- name: Install dependencies
run: |
poetry install --with test --no-dev
- name: Set up environment variables
run: |
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> $GITHUB_ENV
echo "ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }}" >> $GITHUB_ENV
echo "GOOGLE_API_KEY=${{ secrets.GOOGLE_API_KEY }}" >> $GITHUB_ENV
echo "COHERE_API_KEY=${{ secrets.COHERE_API_KEY }}" >> $GITHUB_ENV
echo "HUGGINGFACE_API_KEY=${{ secrets.HUGGINGFACE_API_KEY }}" >> $GITHUB_ENV
echo "REPLICATE_API_KEY=${{ secrets.REPLICATE_API_KEY }}" >> $GITHUB_ENV
echo "TOGETHER_API_KEY=${{ secrets.TOGETHER_API_KEY }}" >> $GITHUB_ENV
- name: Run Main Features Tests
run: |
cd /Users/swarms_wd/Desktop/research/swarms
poetry run python tests/test_main_features.py
- name: Upload test results
uses: actions/upload-artifact@v4
if: always()
with:
name: test-results
path: test_runs/
retention-days: 7
- name: Comment on PR with test results
if: github.event_name == 'pull_request' && always()
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const path = require('path');
try {
// Look for test result files
const testRunsDir = 'test_runs';
if (fs.existsSync(testRunsDir)) {
const files = fs.readdirSync(testRunsDir);
const latestReport = files
.filter(f => f.endsWith('.md'))
.sort()
.pop();
if (latestReport) {
const reportPath = path.join(testRunsDir, latestReport);
const reportContent = fs.readFileSync(reportPath, 'utf8');
// Extract summary from markdown
const summaryMatch = reportContent.match(/## Summary\n\n(.*?)\n\n## Detailed Results/s);
const summary = summaryMatch ? summaryMatch[1] : 'Test results available in artifacts';
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: `## Main Features Test Results\n\n${summary}\n\n📊 Full test report available in artifacts.`
});
}
}
} catch (error) {
console.log('Could not read test results:', error.message);
}
test-coverage:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
needs: test-main-features
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Set up Python 3.10
uses: actions/setup-python@v6
with:
python-version: "3.10"
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Install dependencies
run: |
poetry install --with test
- name: Run coverage analysis
run: |
poetry run pytest tests/test_main_features.py --cov=swarms --cov-report=xml --cov-report=html
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
file: ./coverage.xml
flags: main-features
name: main-features-coverage
fail_ci_if_error: false

@ -0,0 +1,23 @@
from swarms import Agent
from swarms.prompts.finance_agent_sys_prompt import (
FINANCIAL_AGENT_SYS_PROMPT,
)
agent = Agent(
agent_name="Financial-Analysis-Agent", # Name of the agent
agent_description="Personal finance advisor agent", # Description of the agent's role
system_prompt=FINANCIAL_AGENT_SYS_PROMPT, # System prompt for financial tasks
max_loops=1,
mcp_urls=[
"http://0.0.0.0:5932/mcp",
],
model_name="gpt-4o-mini",
output_type="all",
)
out = agent.run(
"Use the discover agent tools to find what agents are available and provide a summary"
)
# Print the output from the agent's run method.
print(out)

@ -242,12 +242,20 @@ This index organizes **100+ production-ready examples** from our [Swarms Example
| Business | [Business Strategy](https://github.com/The-Swarm-Corporation/swarms-examples/blob/main/examples/applications/business_strategy/business_strategy_graph/growth_agent.py) | Strategic planning and business development swarm |
| Research | [Astronomy Research](https://github.com/The-Swarm-Corporation/swarms-examples/blob/main/examples/applications/astronomy/multiversal_detection/test.py) | Collaborative space research and astronomical analysis |
## Additional Resources
- [Github](https://github.com/kyegomez/swarms)
-------
- Discord (https://t.co/zlLe07AqUX)
## Connect With Us
- Telegram (https://t.co/dSRy143zQv)
Join our community of agent engineers and researchers for technical support, cutting-edge updates, and exclusive access to world-class agent engineering insights!
- X Community (https://x.com/i/communities/1875452887414804745)
| Platform | Description | Link |
|----------|-------------|------|
| 📚 Documentation | Official documentation and guides | [docs.swarms.world](https://docs.swarms.world) |
| 📝 Blog | Latest updates and technical articles | [Medium](https://medium.com/@kyeg) |
| 💬 Discord | Live chat and community support | [Join Discord](https://discord.gg/EamjgSaEQf) |
| 🐦 Twitter | Latest news and announcements | [@swarms_corp](https://twitter.com/swarms_corp) |
| 👥 LinkedIn | Professional network and updates | [The Swarm Corporation](https://www.linkedin.com/company/the-swarm-corporation) |
| 📺 YouTube | Tutorials and demos | [Swarms Channel](https://www.youtube.com/channel/UC9yXyitkbU_WSy7bd_41SqQ) |
| 🎫 Events | Join our community events | [Sign up here](https://lu.ma/5p2jnc2v) |
| 🚀 Onboarding Session | Get onboarded with Kye Gomez, creator and lead maintainer of Swarms | [Book Session](https://cal.com/swarms/swarms-onboarding-session) |

@ -86,6 +86,7 @@ The Swarms framework is a powerful multi-agent orchestration platform that enabl
| [Marketing-Swarm-Template](https://github.com/The-Swarm-Corporation/Marketing-Swarm-Template) | Marketing campaign automation template | Marketing Automation | Business |
| [Multi-Agent-Marketing-Course](https://github.com/The-Swarm-Corporation/Multi-Agent-Marketing-Course) | Educational course on multi-agent marketing | Marketing Education | Business |
| [NewsAgent](https://github.com/The-Swarm-Corporation/NewsAgent) | News aggregation and analysis agent | News Analysis | Business |
|[Product-Marketing-Agency](https://github.com/The-Swarm-Corporation/Product-Marketing-Agency) | Product marketing content generation | Product Marketing | Business |
### Legal Services

@ -6853,10 +6853,10 @@ pip3 install -U swarms
| Quickstart | [Get Started](https://docs.swarms.world/en/latest/swarms/install/quickstart/) |
| Environment Setup | [Environment Configuration](https://docs.swarms.world/en/latest/swarms/install/workspace_manager/) |
| Environment Variables | [Environment Variables](https://docs.swarms.world/en/latest/swarms/install/env/) |
| Swarms CLI | [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/main/) |
| Swarms CLI | [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/cli_reference/) |
| Agent Internal Mechanisms | [Agent Architecture](https://docs.swarms.world/en/latest/swarms/framework/agents_explained/) |
| Agent API | [Agent API](https://docs.swarms.world/en/latest/swarms/structs/agent/) |
| Managing Prompts in Production | [Prompts Management](https://docs.swarms.world/en/latest/swarms/prompts/main/) |
| Managing Prompts in Production | [Prompts Management](https://github.com/kyegomez/swarms/tree/master/swarms/prompts/) |
| Integrating External Agents | [External Agents Integration](https://docs.swarms.world/en/latest/swarms/agents/external_party_agents/) |
| Creating Agents from YAML | [YAML Agent Creation](https://docs.swarms.world/en/latest/swarms/agents/create_agents_yaml/) |
| Why You Need Swarms | [Why MultiAgent Collaboration](https://docs.swarms.world/en/latest/swarms/concept/why/) |
@ -6991,7 +6991,7 @@ The Swarms protocol is organized into several key layers, each responsible for a
agents and swarms.
- **Prompts (`swarms/prompts`)**: Houses prompt templates, system prompts, and agent-specific prompts for LLM-based agents. See
[Prompts Management](https://docs.swarms.world/en/latest/swarms/prompts/main/)
[Prompts Management](https://github.com/kyegomez/swarms/tree/master/swarms/prompts/)
- **Telemetry (`swarms/telemetry`)**: Handles logging, monitoring, and bootup routines for observability and debugging.
@ -6999,7 +6999,7 @@ The Swarms protocol is organized into several key layers, each responsible for a
safety and consistency.
- **CLI (`swarms/cli`)**: Provides command-line utilities for agent creation, management, and orchestration. See [CLI Documentation]
(https://docs.swarms.world/en/latest/swarms/cli/main/)
(https://docs.swarms.world/en/latest/swarms/cli/cli_reference/)
---
@ -7218,7 +7218,7 @@ diy_memory/)
- `prompt.py`, `reasoning_prompt.py`, `multi_agent_collab_prompt.py`, etc.
- [Prompts Management](https://docs.swarms.world/en/latest/swarms/prompts/main/)
- [Prompts Management](https://github.com/kyegomez/swarms/tree/master/swarms/prompts/)
### `artifacts/`
@ -7265,7 +7265,7 @@ diy_memory/)
- `main.py`, `create_agent.py`, `onboarding_process.py`.
- [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/main/)
- [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/cli_reference/)
---
@ -7287,7 +7287,7 @@ For example, a typical workflow might involve:
- Logging all actions and outputs for traceability and debugging.
For more advanced examples, see the [Examples Overview](https://docs.swarms.world/en/latest/examples/index/).
For more advanced examples, see the [Examples Overview](https://docs.swarms.world/en/latest/examples/).
---
@ -7321,9 +7321,9 @@ For more on the philosophy and architecture, see [Development Philosophy & Princ
| BaseTool Reference | [BaseTool Reference](https://docs.swarms.world/en/latest/swarms/tools/base_tool/) | Reference for the BaseTool class |
| Reasoning Agents Overview | [Reasoning Agents Overview](https://docs.swarms.world/en/latest/swarms/agents/reasoning_agents_overview/) | Overview of reasoning agents |
| Multi-Agent Architectures Overview | [Multi-Agent Architectures Overview](https://docs.swarms.world/en/latest/swarms/concept/swarm_architectures/) | Multi-agent system architectures |
| Examples Overview | [Examples Overview](https://docs.swarms.world/en/latest/examples/index/) | Example projects and use cases |
| CLI Documentation | [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/main/) | Command-line interface documentation |
| Prompts Management | [Prompts Management](https://docs.swarms.world/en/latest/swarms/prompts/main/) | Managing and customizing prompts |
| Examples Overview | [Examples Overview](https://docs.swarms.world/en/latest/examples/) | Example projects and use cases |
| CLI Documentation | [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/cli_reference/) | Command-line interface documentation |
| Prompts Management | [Prompts Management](https://github.com/kyegomez/swarms/tree/master/swarms/prompts/) | Managing and customizing prompts |
| Development Philosophy & Principles | [Development Philosophy & Principles](https://docs.swarms.world/en/latest/swarms/concept/philosophy/) | Framework philosophy and guiding principles |
| Understanding Swarms Architecture | [Understanding Swarms Architecture](https://docs.swarms.world/en/latest/swarms/concept/framework_architecture/) | In-depth look at Swarms architecture |
| SIP Guidelines and Template | [SIP Guidelines and Template](https://docs.swarms.world/en/latest/protocol/sip/) | Swarms Improvement Proposal process and template |

@ -85,7 +85,7 @@ The Swarms protocol is organized into several key layers, each responsible for a
agents and swarms.
- **Prompts (`swarms/prompts`)**: Houses prompt templates, system prompts, and agent-specific prompts for LLM-based agents. See
[Prompts Management](https://docs.swarms.world/en/latest/swarms/prompts/main/)
[Prompts Management](https://github.com/kyegomez/swarms/tree/master/swarms/prompts/)
- **Telemetry (`swarms/telemetry`)**: Handles logging, monitoring, and bootup routines for observability and debugging.
@ -93,7 +93,7 @@ The Swarms protocol is organized into several key layers, each responsible for a
safety and consistency.
- **CLI (`swarms/cli`)**: Provides command-line utilities for agent creation, management, and orchestration. See [CLI Documentation]
(https://docs.swarms.world/en/latest/swarms/cli/main/)
(https://docs.swarms.world/en/latest/swarms/cli/cli_reference/)
---
@ -312,7 +312,7 @@ diy_memory/)
- `prompt.py`, `reasoning_prompt.py`, `multi_agent_collab_prompt.py`, etc.
- [Prompts Management](https://docs.swarms.world/en/latest/swarms/prompts/main/)
- [Prompts Management](https://github.com/kyegomez/swarms/tree/master/swarms/prompts/)
### `artifacts/`
@ -359,7 +359,7 @@ diy_memory/)
- `main.py`, `create_agent.py`, `onboarding_process.py`.
- [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/main/)
- [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/cli_reference/)
---
@ -381,7 +381,7 @@ For example, a typical workflow might involve:
- Logging all actions and outputs for traceability and debugging.
For more advanced examples, see the [Examples Overview](https://docs.swarms.world/en/latest/examples/index/).
For more advanced examples, see the [Examples Overview](https://docs.swarms.world/en/latest/examples/).
---
@ -415,9 +415,9 @@ For more on the philosophy and architecture, see [Development Philosophy & Princ
| BaseTool Reference | [BaseTool Reference](https://docs.swarms.world/en/latest/swarms/tools/base_tool/) | Reference for the BaseTool class |
| Reasoning Agents Overview | [Reasoning Agents Overview](https://docs.swarms.world/en/latest/swarms/agents/reasoning_agents_overview/) | Overview of reasoning agents |
| Multi-Agent Architectures Overview | [Multi-Agent Architectures Overview](https://docs.swarms.world/en/latest/swarms/concept/swarm_architectures/) | Multi-agent system architectures |
| Examples Overview | [Examples Overview](https://docs.swarms.world/en/latest/examples/index/) | Example projects and use cases |
| CLI Documentation | [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/main/) | Command-line interface documentation |
| Prompts Management | [Prompts Management](https://docs.swarms.world/en/latest/swarms/prompts/main/) | Managing and customizing prompts |
| Examples Overview | [Examples Overview](https://docs.swarms.world/en/latest/examples/) | Example projects and use cases |
| CLI Documentation | [CLI Documentation](https://docs.swarms.world/en/latest/swarms/cli/cli_reference/) | Command-line interface documentation |
| Prompts Management | [Prompts Management](https://github.com/kyegomez/swarms/tree/master/swarms/prompts/) | Managing and customizing prompts |
| Development Philosophy & Principles | [Development Philosophy & Principles](https://docs.swarms.world/en/latest/swarms/concept/philosophy/) | Framework philosophy and guiding principles |
| Understanding Swarms Architecture | [Understanding Swarms Architecture](https://docs.swarms.world/en/latest/swarms/concept/framework_architecture/) | In-depth look at Swarms architecture |
| SIP Guidelines and Template | [SIP Guidelines and Template](https://docs.swarms.world/en/latest/protocol/sip/) | Swarms Improvement Proposal process and template |

@ -2,7 +2,7 @@
This documentation describes the **ForestSwarm** that organizes agents into trees. Each agent specializes in processing specific tasks. Trees are collections of agents, each assigned based on their relevance to a task through keyword extraction and **litellm-based embedding similarity**.
The architecture allows for efficient task assignment by selecting the most relevant agent from a set of trees. Tasks are processed asynchronously, with agents selected based on task relevance, calculated by the similarity of system prompts and task keywords using **litellm embeddings** and cosine similarity calculations.
The architecture allows for efficient task assignment by selecting the most relevant agent from a set of trees. Tasks are processed with agents selected based on task relevance, calculated by the similarity of system prompts and task keywords using **litellm embeddings** and cosine similarity calculations.
## Module Path: `swarms.structs.tree_swarm`
@ -11,24 +11,30 @@ The architecture allows for efficient task assignment by selecting the most rele
### Utility Functions
#### `extract_keywords(prompt: str, top_n: int = 5) -> List[str]`
Extracts relevant keywords from a text prompt using basic word splitting and frequency counting.
**Parameters:**
- `prompt` (str): The text to extract keywords from
- `top_n` (int): Maximum number of keywords to return
**Returns:**
- `List[str]`: List of extracted keywords sorted by frequency
#### `cosine_similarity(vec1: List[float], vec2: List[float]) -> float`
Calculates the cosine similarity between two embedding vectors.
**Parameters:**
- `vec1` (List[float]): First embedding vector
- `vec2` (List[float]): Second embedding vector
**Returns:**
- `float`: Cosine similarity score between -1 and 1
- `float`: Cosine similarity score between 0 and 1
---
@ -36,25 +42,29 @@ Calculates the cosine similarity between two embedding vectors.
`TreeAgent` represents an individual agent responsible for handling a specific task. Agents are initialized with a **system prompt** and use **litellm embeddings** to dynamically determine their relevance to a given task.
#### Attributes
#### TreeAgent Attributes
| **Attribute** | **Type** | **Description** |
|--------------------------|------------------|---------------------------------------------------------------------------------|
| `name` | `str` | Name of the agent |
| `description` | `str` | Description of the agent |
| `system_prompt` | `str` | A string that defines the agent's area of expertise and task-handling capability.|
| `llm` | `callable` | The language model (LLM) used to process tasks (e.g., GPT-4). |
| `model_name` | `str` | Name of the language model to use (default: "gpt-4.1") |
| `agent_name` | `str` | The name of the agent. |
| `system_prompt_embedding`| `List[float]` | **litellm-generated embedding** of the system prompt for similarity-based task matching.|
| `relevant_keywords` | `List[str]` | Keywords dynamically extracted from the system prompt to assist in task matching.|
| `distance` | `Optional[float]`| The computed distance between agents based on embedding similarity. |
| `embedding_model_name` | `str` | **Name of the litellm embedding model** (default: "text-embedding-ada-002"). |
| `verbose` | `bool` | Whether to enable verbose logging |
#### Methods
#### TreeAgent Methods
| **Method** | **Input** | **Output** | **Description** |
|--------------------|---------------------------------|--------------------|---------------------------------------------------------------------------------|
| `__init__(name, description, system_prompt, model_name, agent_name, embedding_model_name, verbose, *args, **kwargs)` | Various initialization parameters | `None` | Initializes a TreeAgent with litellm embedding capabilities |
| `_get_embedding(text: str)` | `text: str` | `List[float]` | **Internal method to generate embeddings using litellm.** |
| `calculate_distance(other_agent: TreeAgent)` | `other_agent: TreeAgent` | `float` | Calculates the **cosine similarity distance** between this agent and another agent.|
| `run_task(task: str)` | `task: str` | `Any` | Executes the task, logs the input/output, and returns the result. |
| `run_task(task: str, img: str = None, *args, **kwargs)` | `task: str, img: str, *args, **kwargs` | `Any` | Executes the task, logs the input/output, and returns the result. |
| `is_relevant_for_task(task: str, threshold: float = 0.7)` | `task: str, threshold: float` | `bool` | Checks if the agent is relevant for the task using **keyword matching and litellm embedding similarity**.|
---
@ -63,28 +73,30 @@ Calculates the cosine similarity between two embedding vectors.
`Tree` organizes multiple agents into a hierarchical structure, where agents are sorted based on their relevance to tasks using **litellm embeddings**.
#### Attributes
#### Tree Attributes
| **Attribute** | **Type** | **Description** |
|--------------------------|------------------|---------------------------------------------------------------------------------|
| `tree_name` | `str` | The name of the tree (represents a domain of agents, e.g., "Financial Tree"). |
| `agents` | `List[TreeAgent]`| List of agents belonging to this tree, **sorted by embedding-based distance**. |
| `verbose` | `bool` | Whether to enable verbose logging |
#### Methods
#### Tree Methods
| **Method** | **Input** | **Output** | **Description** |
|--------------------|---------------------------------|--------------------|---------------------------------------------------------------------------------|
| `__init__(tree_name: str, agents: List[TreeAgent], verbose: bool = False)` | `tree_name: str, agents: List[TreeAgent], verbose: bool` | `None` | Initializes a tree of agents |
| `calculate_agent_distances()` | `None` | `None` | **Calculates and assigns distances between agents based on litellm embedding similarity of prompts.** |
| `find_relevant_agent(task: str)` | `task: str` | `Optional[TreeAgent]` | **Finds the most relevant agent for a task based on keyword and litellm embedding similarity.** |
| `log_tree_execution(task: str, selected_agent: TreeAgent, result: Any)` | `task: str, selected_agent: TreeAgent, result: Any` | `None` | Logs details of the task execution by the selected agent. |
---
### Class: `ForestSwarm`
### Class: `ForestSwarm`
`ForestSwarm` is the main class responsible for managing multiple trees. It oversees task delegation by finding the most relevant tree and agent for a given task using **litellm embeddings**.
#### Attributes
#### ForestSwarm Attributes
| **Attribute** | **Type** | **Description** |
|--------------------------|------------------|---------------------------------------------------------------------------------|
@ -92,42 +104,51 @@ Calculates the cosine similarity between two embedding vectors.
| `description` | `str` | Description of the forest swarm. |
| `trees` | `List[Tree]` | List of trees containing agents organized by domain. |
| `shared_memory` | `Any` | Shared memory object for inter-tree communication. |
| `rules` | `str` | Rules governing the forest swarm behavior. |
| `verbose` | `bool` | Whether to enable verbose logging |
| `save_file_path` | `str` | File path for saving conversation logs |
| `conversation` | `Conversation` | Conversation object for tracking interactions. |
#### Methods
#### ForestSwarm Methods
| **Method** | **Input** | **Output** | **Description** |
|--------------------|---------------------------------|--------------------|---------------------------------------------------------------------------------|
| `__init__(name, description, trees, shared_memory, rules, verbose, *args, **kwargs)` | Various initialization parameters | `None` | Initialize a ForestSwarm with multiple trees of agents |
| `find_relevant_tree(task: str)` | `task: str` | `Optional[Tree]` | **Searches across all trees to find the most relevant tree based on litellm embedding similarity.**|
| `run(task: str, img: str = None, *args, **kwargs)` | `task: str, img: str, *args, **kwargs` | `Any` | **Executes the task by finding the most relevant agent from the relevant tree using litellm embeddings.**|
| `batched_run(tasks: List[str], *args, **kwargs)` | `tasks: List[str], *args, **kwargs` | `List[Any]` | **Executes multiple tasks by finding the most relevant agent for each task.**|
---
### Pydantic Models for Logging
#### `AgentLogInput`
Input log model for tracking agent task execution.
**Fields:**
- `log_id` (str): Unique identifier for the log entry
- `agent_name` (str): Name of the agent executing the task
- `task` (str): Description of the task being executed
- `timestamp` (datetime): When the task was started
#### `AgentLogOutput`
Output log model for tracking agent task completion.
**Fields:**
- `log_id` (str): Unique identifier for the log entry
- `agent_name` (str): Name of the agent that completed the task
- `result` (Any): Result/output from the task execution
- `timestamp` (datetime): When the task was completed
#### `TreeLog`
Tree execution log model for tracking tree-level operations.
**Fields:**
- `log_id` (str): Unique identifier for the log entry
- `tree_name` (str): Name of the tree that executed the task
- `task` (str): Description of the task that was executed
@ -145,49 +166,72 @@ from swarms.structs.tree_swarm import TreeAgent, Tree, ForestSwarm
# Create agents with varying system prompts and dynamically generated distances/keywords
agents_tree1 = [
TreeAgent(
name="Financial Advisor",
system_prompt="I am a financial advisor specializing in investment planning, retirement strategies, and tax optimization for individuals and businesses.",
agent_name="Financial Advisor",
verbose=True
),
TreeAgent(
name="Tax Expert",
system_prompt="I am a tax expert with deep knowledge of corporate taxation, Delaware incorporation benefits, and free tax filing options for businesses.",
agent_name="Tax Expert",
verbose=True
),
TreeAgent(
name="Retirement Planner",
system_prompt="I am a retirement planning specialist who helps individuals and businesses create comprehensive retirement strategies and investment plans.",
agent_name="Retirement Planner",
verbose=True
),
]
agents_tree2 = [
TreeAgent(
name="Stock Analyst",
system_prompt="I am a stock market analyst who provides insights on market trends, stock recommendations, and portfolio optimization strategies.",
agent_name="Stock Analyst",
verbose=True
),
TreeAgent(
name="Investment Strategist",
system_prompt="I am an investment strategist specializing in portfolio diversification, risk management, and market analysis.",
agent_name="Investment Strategist",
verbose=True
),
TreeAgent(
name="ROTH IRA Specialist",
system_prompt="I am a ROTH IRA specialist who helps individuals optimize their retirement accounts and tax advantages.",
agent_name="ROTH IRA Specialist",
verbose=True
),
]
# Create trees
tree1 = Tree(tree_name="Financial Services Tree", agents=agents_tree1)
tree2 = Tree(tree_name="Investment & Trading Tree", agents=agents_tree2)
tree1 = Tree(tree_name="Financial Services Tree", agents=agents_tree1, verbose=True)
tree2 = Tree(tree_name="Investment & Trading Tree", agents=agents_tree2, verbose=True)
# Create the ForestSwarm
forest_swarm = ForestSwarm(
name="Financial Services Forest",
description="A comprehensive financial services multi-agent system",
trees=[tree1, tree2]
trees=[tree1, tree2],
verbose=True
)
# Run a task
task = "Our company is incorporated in Delaware, how do we do our taxes for free?"
output = forest_swarm.run(task)
print(output)
# Run multiple tasks
tasks = [
"What are the best investment strategies for retirement?",
"How do I file taxes for my Delaware corporation?",
"What's the current market outlook for tech stocks?"
]
results = forest_swarm.batched_run(tasks)
for i, result in enumerate(results):
print(f"Task {i+1} result: {result}")
```
---
@ -203,12 +247,14 @@ print(output)
- Searches through all trees using **cosine similarity**
- Finds the most relevant agent based on **embedding similarity and keyword matching**
6. **Task Execution**: The selected agent processes the task, and the result is returned and logged.
7. **Batched Processing**: Multiple tasks can be processed using the `batched_run` method for efficient batch processing.
```plaintext
Task: "Our company is incorporated in Delaware, how do we do our taxes for free?"
```
**Process**:
**Process:**
- The system generates **litellm embeddings** for the task
- Searches through the `Financial Services Tree` and `Investment & Trading Tree`
- Uses **cosine similarity** to find the most relevant agent (likely the "Tax Expert")
@ -219,20 +265,29 @@ Task: "Our company is incorporated in Delaware, how do we do our taxes for free?
## Key Features
### **litellm Integration**
- **Embedding Generation**: Uses litellm's `embedding()` function for generating high-quality embeddings
- **Model Flexibility**: Supports various embedding models (default: "text-embedding-ada-002")
- **Error Handling**: Robust fallback mechanisms for embedding failures
### **Semantic Similarity**
- **Cosine Similarity**: Implements efficient cosine similarity calculations for vector comparisons
- **Threshold-based Selection**: Configurable similarity thresholds for agent selection
- **Hybrid Matching**: Combines keyword matching with semantic similarity for optimal results
### **Dynamic Agent Organization**
- **Automatic Distance Calculation**: Agents are automatically organized by semantic similarity
- **Real-time Relevance**: Task relevance is calculated dynamically using current embeddings
- **Scalable Architecture**: Easy to add/remove agents and trees without manual configuration
### **Batch Processing**
- **Batched Execution**: Process multiple tasks efficiently using `batched_run` method
- **Parallel Processing**: Each task is processed independently with the most relevant agent
- **Result Aggregation**: All results are returned as a list for easy processing
---
## Analysis of the Swarm Architecture
@ -243,7 +298,7 @@ The **ForestSwarm Architecture** leverages a hierarchical structure (forest) com
- **Task Specialization**: Each agent is specialized, which ensures that tasks are matched with the most appropriate agent based on **litellm embedding similarity** and expertise.
- **Dynamic Matching**: The architecture uses both keyword-based and **litellm embedding-based matching** to assign tasks, ensuring a high level of accuracy in agent selection.
- **Logging and Accountability**: Each task execution is logged in detail, providing transparency and an audit trail of which agent handled which task and the results produced.
- **Asynchronous Task Execution**: The architecture can be adapted for asynchronous task processing, making it scalable and suitable for large-scale task handling in real-time systems.
- **Batch Processing**: The architecture supports efficient batch processing of multiple tasks simultaneously.
---
@ -274,6 +329,13 @@ graph TD
P --> Q[Execute Task]
Q --> R[Log Results]
end
subgraph Batch Processing
S[Multiple Tasks] --> T[Process Each Task]
T --> U[Find Relevant Agent per Task]
U --> V[Execute All Tasks]
V --> W[Return Results List]
end
```
### Explanation of the Diagram
@ -283,6 +345,7 @@ graph TD
- **Agents**: Each agent within the tree is responsible for handling tasks in its area of expertise. Agents within a tree are organized based on their **litellm embedding similarity** (distance).
- **Embedding Process**: Shows how **litellm embeddings** are used for similarity calculations and agent selection.
- **Task Processing**: Illustrates the complete workflow from task input to result logging.
- **Batch Processing**: Shows how multiple tasks can be processed efficiently using the `batched_run` method.
---
@ -295,6 +358,7 @@ python test_forest_swarm.py
```
The test suite covers:
- **Utility Functions**: `extract_keywords`, `cosine_similarity`
- **Pydantic Models**: `AgentLogInput`, `AgentLogOutput`, `TreeLog`
- **Core Classes**: `TreeAgent`, `Tree`, `ForestSwarm`
@ -308,8 +372,11 @@ The test suite covers:
This **ForestSwarm Architecture** provides an efficient, scalable, and accurate architecture for delegating and executing tasks based on domain-specific expertise. The combination of hierarchical organization, **litellm-based semantic similarity**, dynamic task matching, and comprehensive logging ensures reliability, performance, and transparency in task execution.
**Key Advantages:**
- **High Accuracy**: litellm embeddings provide superior semantic understanding
- **Scalability**: Easy to add new agents, trees, and domains
- **Flexibility**: Configurable similarity thresholds and embedding models
- **Robustness**: Comprehensive error handling and fallback mechanisms
- **Transparency**: Detailed logging and audit trails for all operations
- **Transparency**: Detailed logging and audit trails for all operations
- **Batch Processing**: Efficient processing of multiple tasks simultaneously
- **Verbose Logging**: Comprehensive logging at all levels for debugging and monitoring

@ -1,283 +1,375 @@
# SequentialWorkflow Documentation
**Overview:**
A Sequential Swarm architecture processes tasks in a linear sequence. Each agent completes its task before passing the result to the next agent in the chain. This architecture ensures orderly processing and is useful when tasks have dependencies. The system now includes **sequential awareness** features that allow agents to know about the agents ahead and behind them in the workflow, significantly enhancing coordination and context understanding. [Learn more here in the docs:](https://docs.swarms.world/en/latest/swarms/structs/agent_rearrange/)
A Sequential Swarm architecture processes tasks in a linear sequence. Each agent completes its task before passing the result to the next agent in the chain. This architecture ensures orderly processing and is useful when tasks have dependencies.
**Use-Cases:**
- Workflows where each step depends on the previous one, such as assembly lines or sequential data processing.
- Scenarios requiring strict order of operations.
- **NEW**: Enhanced workflows where agents need context about their position in the sequence for better coordination.
- Multi-step content creation, analysis, and refinement workflows.
```mermaid
graph TD
A[First Agent] --> B[Second Agent]
B --> C[Third Agent]
C --> D[Fourth Agent]
style A fill:#e1f5fe
style B fill:#f3e5f5
style C fill:#e8f5e8
style D fill:#fff3e0
A -.->|"Awareness: None (first)"| A
B -.->|"Awareness: Ahead: A, Behind: C"| B
C -.->|"Awareness: Ahead: B, Behind: D"| C
D -.->|"Awareness: Ahead: C, Behind: None (last)"| D
```
## **Sequential Awareness Feature**
The SequentialWorkflow now includes a powerful **sequential awareness** feature that automatically provides each agent with context about their position in the workflow:
### What Agents Know Automatically
- **Agent ahead**: The agent that completed their task before them
- **Agent behind**: The agent that will receive their output next
- **Workflow position**: Their step number and role in the sequence
### Benefits
1. **Better Coordination**: Agents can reference previous work and prepare output for the next step
2. **Context Understanding**: Each agent knows their role in the larger workflow
3. **Improved Quality**: Output is tailored for the next agent in the sequence
4. **Enhanced Logging**: Better tracking of agent interactions and workflow progress
## Attributes
| Attribute | Type | Description |
|------------------|---------------|--------------------------------------------------|
| `agents` | `List[Agent]` | The list of agents in the workflow. |
| `flow` | `str` | A string representing the order of agents. |
| `agent_rearrange`| `AgentRearrange` | Manages the dynamic execution of agents with sequential awareness. |
| `team_awareness` | `bool` | **NEW**: Enables sequential awareness features. Defaults to `False`. |
| `time_enabled` | `bool` | **NEW**: Enables timestamps in conversation. Defaults to `False`. |
| `message_id_on` | `bool` | **NEW**: Enables message IDs in conversation. Defaults to `False`. |
| `id` | `str` | Unique identifier for the workflow instance. Defaults to `"sequential_workflow"`. |
| `name` | `str` | Human-readable name for the workflow. Defaults to `"SequentialWorkflow"`. |
| `description` | `str` | Description of the workflow's purpose. |
| `agents` | `List[Union[Agent, Callable]]` | The list of agents or callables in the workflow. |
| `max_loops` | `int` | Maximum number of times to execute the workflow. Defaults to `1`. |
| `output_type` | `OutputType` | Format of the output from the workflow. Defaults to `"dict"`. |
| `shared_memory_system` | `callable` | Optional callable for managing shared memory between agents. |
| `multi_agent_collab_prompt` | `bool` | If True, appends a collaborative prompt to each agent's system prompt. |
| `team_awareness` | `bool` | Enables sequential awareness features (passed to internal `AgentRearrange`). Defaults to `False`. |
| `flow` | `str` | A string representing the order of agents (e.g., "Agent1 -> Agent2 -> Agent3"). |
| `agent_rearrange`| `AgentRearrange` | Internal helper for managing agent execution. |
## Methods
### `__init__(self, agents: List[Agent] = None, max_loops: int = 1, team_awareness: bool = False, time_enabled: bool = False, message_id_on: bool = False, *args, **kwargs)`
### `__init__(self, agents: List[Union[Agent, Callable]] = None, max_loops: int = 1, team_awareness: bool = False, *args, **kwargs)`
The constructor initializes the `SequentialWorkflow` object with enhanced sequential awareness capabilities.
The constructor initializes the `SequentialWorkflow` object.
- **Parameters:**
- `agents` (`List[Agent]`, optional): The list of agents in the workflow. Defaults to `None`.
- `id` (`str`, optional): Unique identifier for the workflow. Defaults to `"sequential_workflow"`.
- `name` (`str`, optional): Name of the workflow. Defaults to `"SequentialWorkflow"`.
- `description` (`str`, optional): Description of the workflow. Defaults to a standard description.
- `agents` (`List[Union[Agent, Callable]]`, optional): The list of agents or callables to execute in sequence.
- `max_loops` (`int`, optional): The maximum number of loops to execute the workflow. Defaults to `1`.
- `team_awareness` (`bool`, optional): **NEW**: Enables sequential awareness features. Defaults to `False`.
- `time_enabled` (`bool`, optional): **NEW**: Enables timestamps in conversation. Defaults to `False`.
- `message_id_on` (`bool`, optional): **NEW**: Enables message IDs in conversation. Defaults to `False`.
- `output_type` (`OutputType`, optional): Output format for the workflow. Defaults to `"dict"`.
- `shared_memory_system` (`callable`, optional): Callable for shared memory management. Defaults to `None`.
- `multi_agent_collab_prompt` (`bool`, optional): If True, appends a collaborative prompt to each agent's system prompt. Defaults to `False`.
- `team_awareness` (`bool`, optional): Enables sequential awareness features in the underlying `AgentRearrange`. Defaults to `False`.
- `*args`: Variable length argument list.
- `**kwargs`: Arbitrary keyword arguments.
### `run(self, task: str) -> str`
### `run(self, task: str, img: Optional[str] = None, imgs: Optional[List[str]] = None, *args, **kwargs) -> str`
Runs the specified task through the agents in the dynamically constructed flow with enhanced sequential awareness.
Runs the specified task through the agents in the dynamically constructed flow.
- **Parameters:**
- `task` (`str`): The task for the agents to execute.
- `img` (`Optional[str]`, optional): An optional image input for the agents.
- `imgs` (`Optional[List[str]]`, optional): Optional list of images for the agents.
- `*args`: Additional positional arguments.
- `**kwargs`: Additional keyword arguments.
- **Returns:**
- `str`: The final result after processing through all agents.
- The final result after processing through all agents.
### `run_batched(self, tasks: List[str]) -> List[str]`
Executes a batch of tasks through the agents in the dynamically constructed flow.
- **Parameters:**
- `tasks` (`List[str]`): A list of tasks for the agents to execute.
### **NEW: Sequential Awareness Methods**
- **Returns:**
- `List[str]`: A list of final results after processing through all agents.
#### `get_agent_sequential_awareness(self, agent_name: str) -> str`
### `async run_async(self, task: str) -> str`
Gets the sequential awareness information for a specific agent, showing which agents come before and after in the sequence.
Executes the specified task through the agents asynchronously.
- **Parameters:**
- `agent_name` (`str`): The name of the agent to get awareness for.
- `task` (`str`): The task for the agents to execute.
- **Returns:**
- `str`: A string describing the agents ahead and behind in the sequence.
- `str`: The final result after processing through all agents.
### `async run_concurrent(self, tasks: List[str]) -> List[str]`
#### `get_sequential_flow_structure(self) -> str`
Executes a batch of tasks through the agents concurrently.
Gets the overall sequential flow structure information showing the complete workflow with relationships between agents.
- **Parameters:**
- `tasks` (`List[str]`): A list of tasks for the agents to execute.
- **Returns:**
- `str`: A string describing the complete sequential flow structure.
- `List[str]`: A list of final results after processing through all agents.
## **Usage Example with Sequential Awareness:**
## Usage Examples
### Basic Sequential Workflow
This example demonstrates a simple two-agent workflow for researching and writing a blog post.
```python
from swarms import Agent, SequentialWorkflow
# Initialize agents for individual tasks
agent1 = Agent(
agent_name="ICD-10 Code Analyzer",
system_prompt="Analyze medical data and provide relevant ICD-10 codes.",
model_name="gpt-4.1",
max_loops=1,
)
agent2 = Agent(
agent_name="ICD-10 Code Summarizer",
system_prompt="Summarize the findings and suggest ICD-10 codes.",
model_name="gpt-4.1",
max_loops=1,
)
agent3 = Agent(
agent_name="ICD-10 Code Validator",
system_prompt="Validate and finalize the ICD-10 code recommendations.",
model_name="gpt-4.1",
max_loops=1,
# Agent 1: The Researcher
researcher = Agent(
agent_name="Researcher",
system_prompt="Your job is to research the provided topic and provide a detailed summary.",
model_name="gpt-4o-mini",
)
# Create the Sequential workflow with enhanced awareness
workflow = SequentialWorkflow(
agents=[agent1, agent2, agent3],
max_loops=1,
verbose=False,
team_awareness=True, # Enable sequential awareness
time_enabled=True, # Enable timestamps
message_id_on=True # Enable message IDs
# Agent 2: The Writer
writer = Agent(
agent_name="Writer",
system_prompt="Your job is to take the research summary and write a beautiful, engaging blog post about it.",
model_name="gpt-4o-mini",
)
# Get workflow structure information
flow_structure = workflow.get_sequential_flow_structure()
print("Workflow Structure:")
print(flow_structure)
# Create a sequential workflow where the researcher's output feeds into the writer's input
workflow = SequentialWorkflow(agents=[researcher, writer])
# Get awareness for specific agents
analyzer_awareness = workflow.get_agent_sequential_awareness("ICD-10 Code Analyzer")
summarizer_awareness = workflow.get_agent_sequential_awareness("ICD-10 Code Summarizer")
validator_awareness = workflow.get_agent_sequential_awareness("ICD-10 Code Validator")
# Run the workflow on a task
final_post = workflow.run("The history and future of artificial intelligence")
print(final_post)
```
print(f"\nAnalyzer Awareness: {analyzer_awareness}")
print(f"Summarizer Awareness: {summarizer_awareness}")
print(f"Validator Awareness: {validator_awareness}")
### Legal Practice Workflow
# Run the workflow
result = workflow.run(
"Analyze the medical report and provide the appropriate ICD-10 codes."
)
print(f"\nFinal Result: {result}")
```
This example shows how to create a sequential workflow with multiple specialized legal agents.
**Expected Output:**
```
Workflow Structure:
Sequential Flow Structure:
Step 1: ICD-10 Code Analyzer
Step 2: ICD-10 Code Summarizer (follows: ICD-10 Code Analyzer) (leads to: ICD-10 Code Validator)
Step 3: ICD-10 Code Validator (follows: ICD-10 Code Summarizer)
Analyzer Awareness:
Summarizer Awareness: Sequential awareness: Agent ahead: ICD-10 Code Analyzer | Agent behind: ICD-10 Code Validator
Validator Awareness: Sequential awareness: Agent ahead: ICD-10 Code Summarizer
```
```python
from swarms import Agent, SequentialWorkflow
## **How Sequential Awareness Works**
# Litigation Agent
litigation_agent = Agent(
agent_name="Alex Johnson",
system_prompt="As a Litigator, you specialize in navigating the complexities of lawsuits. Your role involves analyzing intricate facts, constructing compelling arguments, and devising effective case strategies to achieve favorable outcomes for your clients.",
model_name="gpt-4o-mini",
max_loops=1,
)
### 1. **Automatic Context Injection**
When `team_awareness=True`, the system automatically adds awareness information to each agent's conversation context before they run:
# Corporate Attorney Agent
corporate_agent = Agent(
agent_name="Emily Carter",
system_prompt="As a Corporate Attorney, you provide expert legal advice on business law matters. You guide clients on corporate structure, governance, compliance, and transactions, ensuring their business operations align with legal requirements.",
model_name="gpt-4o-mini",
max_loops=1,
)
- **First Agent**: No awareness info (starts the workflow)
- **Middle Agents**: Receive info about both the agent ahead and behind
- **Last Agent**: Receives info about the agent ahead only
# IP Attorney Agent
ip_agent = Agent(
agent_name="Michael Smith",
system_prompt="As an IP Attorney, your expertise lies in protecting intellectual property rights. You handle various aspects of IP law, including patents, trademarks, copyrights, and trade secrets, helping clients safeguard their innovations.",
model_name="gpt-4o-mini",
max_loops=1,
)
### 2. **Enhanced Agent Prompts**
Each agent receives context like:
```
Sequential awareness: Agent ahead: ICD-10 Code Analyzer | Agent behind: ICD-10 Code Validator
# Initialize and run the workflow
swarm = SequentialWorkflow(
agents=[litigation_agent, corporate_agent, ip_agent],
name="litigation-practice",
description="Handle all aspects of litigation with a focus on thorough legal analysis and effective case management.",
)
swarm.run("Create a report on how to patent an all-new AI invention and what platforms to use and more.")
```
### 3. **Improved Coordination**
Agents can now:
- Reference previous work more effectively
- Prepare output specifically for the next agent
- Understand their role in the larger workflow
- Provide better context for subsequent steps
### Startup Idea Validation Workflow
## **Advanced Usage Examples**
This example demonstrates a 3-step process for generating, validating, and pitching a startup idea.
### **Example 1: Research → Analysis → Report Workflow**
```python
# Create specialized agents
researcher = Agent(
agent_name="Researcher",
system_prompt="Conduct thorough research on the given topic."
)
from swarms import Agent, SequentialWorkflow
analyzer = Agent(
agent_name="Data Analyzer",
system_prompt="Analyze research data and identify key insights."
# 1. Generate an idea
idea_generator = Agent(
agent_name="IdeaGenerator",
system_prompt="Generate a unique startup idea.",
model_name="gpt-4o-mini"
)
reporter = Agent(
agent_name="Report Writer",
system_prompt="Write comprehensive reports based on analysis."
# 2. Validate the idea
validator = Agent(
agent_name="Validator",
system_prompt="Take this startup idea and analyze its market viability.",
model_name="gpt-4o-mini"
)
# Create workflow with awareness
workflow = SequentialWorkflow(
agents=[researcher, analyzer, reporter],
team_awareness=True,
time_enabled=True
# 3. Create a pitch
pitch_creator = Agent(
agent_name="PitchCreator",
system_prompt="Write a 3-sentence elevator pitch for this validated startup idea.",
model_name="gpt-4o-mini"
)
# Run with enhanced coordination
result = workflow.run("Research and analyze the impact of AI on healthcare")
# Create the sequential workflow
workflow = SequentialWorkflow(agents=[idea_generator, validator, pitch_creator])
# Run the workflow
elevator_pitch = workflow.run("Generate and validate a startup idea in the AI space")
print(elevator_pitch)
```
### **Example 2: Code Review Workflow**
### Advanced: Materials Science Workflow
This example shows a complex workflow with multiple specialized materials science agents.
```python
# Create code review agents
linter = Agent(
agent_name="Code Linter",
system_prompt="Check code for syntax errors and style violations."
from swarms import Agent, SequentialWorkflow
# Chief Metallurgist
chief_metallurgist = Agent(
agent_name="Chief-Metallurgist",
system_prompt="As the Chief Metallurgist, you oversee the entire alloy development process, analyzing atomic structure, phase diagrams, and composition development.",
model_name="gpt-4o",
max_loops=1,
)
reviewer = Agent(
agent_name="Code Reviewer",
system_prompt="Review code quality and suggest improvements."
# Materials Scientist
materials_scientist = Agent(
agent_name="Materials-Scientist",
system_prompt="As the Materials Scientist, you analyze physical and mechanical properties including density, thermal properties, tensile strength, and microstructure.",
model_name="gpt-4o",
max_loops=1,
)
tester = Agent(
agent_name="Code Tester",
system_prompt="Write and run tests for the reviewed code."
# Process Engineer
process_engineer = Agent(
agent_name="Process-Engineer",
system_prompt="As the Process Engineer, you develop manufacturing processes including melting procedures, heat treatment protocols, and quality control methods.",
model_name="gpt-4o",
max_loops=1,
)
# Create workflow
workflow = SequentialWorkflow(
agents=[linter, reviewer, tester],
team_awareness=True
# Quality Assurance Specialist
qa_specialist = Agent(
agent_name="QA-Specialist",
system_prompt="As the QA Specialist, you establish quality standards, testing protocols, and documentation requirements.",
model_name="gpt-4o",
max_loops=1,
)
# Run code review process
result = workflow.run("Review and test the authentication module")
# Applications Engineer
applications_engineer = Agent(
agent_name="Applications-Engineer",
system_prompt="As the Applications Engineer, you analyze potential applications, performance requirements, and competitive positioning.",
model_name="gpt-4o",
max_loops=1,
)
# Cost Analyst
cost_analyst = Agent(
agent_name="Cost-Analyst",
system_prompt="As the Cost Analyst, you evaluate material costs, production costs, and economic viability.",
model_name="gpt-4o",
max_loops=1,
)
# Create the agent list
agents = [
chief_metallurgist,
materials_scientist,
process_engineer,
qa_specialist,
applications_engineer,
cost_analyst,
]
# Initialize the workflow
swarm = SequentialWorkflow(
name="alloy-development-system",
agents=agents,
)
# Run the workflow
result = swarm.run(
"""Analyze and develop a new high-strength aluminum alloy for aerospace applications
with improved fatigue resistance and corrosion resistance compared to 7075-T6,
while maintaining similar density and cost effectiveness."""
)
print(result)
```
## **Notes:**
## Configuration Options
### Agent Parameters
| Parameter | Description | Default |
|-----------|-------------|---------|
| `agent_name` | Human-readable name for the agent | Required |
| `system_prompt` | Detailed role description and expertise | Required |
| `model_name` | LLM model to use | "gpt-4o-mini" |
| `max_loops` | Maximum number of processing loops | 1 |
### Workflow Parameters
| Parameter | Description | Default |
|-----------|-------------|---------|
| `agents` | List of agents to execute in sequence | Required |
| `name` | Name of the workflow | "SequentialWorkflow" |
| `description` | Description of workflow purpose | Standard description |
| `max_loops` | Number of times to execute workflow | 1 |
| `team_awareness` | Enable sequential awareness features | False |
## Best Practices
- **Enhanced Logging**: The workflow now logs sequential awareness information for better debugging and monitoring.
- **Automatic Context**: No manual configuration needed - awareness is automatically provided when `team_awareness=True`.
- **Backward Compatibility**: Existing workflows continue to work without changes.
- **Performance**: Sequential awareness adds minimal overhead while significantly improving coordination.
1. **Clear Agent Roles**: Give each agent a specific, well-defined role with a detailed system prompt.
2. **Ordered Dependencies**: Arrange agents in an order that makes sense for your workflow (e.g., research before writing).
3. **Agent Names**: Use descriptive agent names that clearly indicate their function.
4. **System Prompts**: Write comprehensive system prompts that explain the agent's expertise and responsibilities.
5. **Task Clarity**: Provide clear, specific tasks when calling `run()`.
### Logging and Error Handling
## Logging and Error Handling
The `run` method now includes enhanced logging to track the sequential awareness flow and captures detailed information about agent interactions:
The `run` method includes comprehensive logging to track workflow execution:
```bash
2023-05-08 10:30:15.456 | INFO | SequentialWorkflow:run:45 - Starting sequential workflow execution
2023-05-08 10:30:15.457 | INFO | SequentialWorkflow:run:52 - Added sequential awareness for ICD-10 Code Summarizer: Sequential awareness: Agent ahead: ICD-10 Code Analyzer | Agent behind: ICD-10 Code Validator
2023-05-08 10:30:15.458 | INFO | SequentialWorkflow:run:52 - Added sequential awareness for ICD-10 Code Validator: Sequential awareness: Agent ahead: ICD-10 Code Summarizer
2023-05-08 10:30:15.456 | INFO | Sequential Workflow Name: SequentialWorkflow is ready to run.
```
## Additional Tips
All errors during execution are logged and re-raised for proper error handling.
## Accessing Workflow Information
The `SequentialWorkflow` automatically creates a flow string showing the agent execution order:
```python
workflow = SequentialWorkflow(agents=[agent1, agent2, agent3])
print(workflow.flow) # Output: "Agent1 -> Agent2 -> Agent3"
```
## Advanced Features
### Team Awareness
Enable `team_awareness=True` to provide agents with context about their position in the workflow (this feature is managed by the internal `AgentRearrange` object):
```python
workflow = SequentialWorkflow(
agents=[researcher, writer, editor],
team_awareness=True,
)
```
### Multi-Agent Collaboration Prompt
Set `multi_agent_collab_prompt=True` to automatically append a collaboration prompt to each agent's system prompt:
```python
workflow = SequentialWorkflow(
agents=[agent1, agent2, agent3],
multi_agent_collab_prompt=True,
)
```
- **Enable Team Awareness**: Set `team_awareness=True` to unlock the full potential of sequential coordination.
- **Use Descriptive Agent Names**: Clear agent names make the awareness information more useful.
- **Monitor Logs**: Enhanced logging provides insights into how agents are coordinating.
- **Iterative Improvement**: Use the awareness features to refine agent prompts and improve workflow quality.
## Notes
## **Benefits of Sequential Awareness**
- The `SequentialWorkflow` internally uses `AgentRearrange` to manage agent execution.
- Each agent receives the output of the previous agent as its input.
- The workflow executes agents in the exact order they appear in the `agents` list.
- The workflow is designed for production use with comprehensive error handling and logging.
- For parallel execution, consider using `ConcurrentWorkflow` or `SpreadSheetSwarm` instead.
1. **Improved Quality**: Agents produce better output when they understand their context
2. **Better Coordination**: Reduced redundancy and improved handoffs between agents
3. **Enhanced Debugging**: Clear visibility into agent interactions and workflow progress
4. **Scalable Workflows**: Easy to add new agents while maintaining coordination
5. **Professional Workflows**: Mimics real-world team collaboration patterns
## Related Architectures
The SequentialWorkflow with sequential awareness represents a significant advancement in multi-agent coordination, enabling more sophisticated and professional workflows that closely mirror human team collaboration patterns.
- **[ConcurrentWorkflow](https://docs.swarms.world/en/latest/swarms/structs/concurrent_workflow/)**: For running agents in parallel
- **[AgentRearrange](https://docs.swarms.world/en/latest/swarms/structs/agent_rearrange/)**: For complex agent relationships and dynamic flows
- **[SwarmRouter](https://docs.swarms.world/en/latest/swarms/structs/swarm_router/)**: Universal orchestrator for switching between different swarm types

@ -1,17 +1,22 @@
import json
from swarms import Agent
# Initialize the agent
agent = Agent(
agent_name="Quantitative-Trading-Agent",
agent_description="Advanced quantitative trading and algorithmic analysis agent",
model_name="anthropic/claude-haiku-4-5-20251001",
model_name="gpt-4.1",
dynamic_temperature_enabled=True,
max_loops=1,
dynamic_context_window=True,
streaming_on=True,
streaming_on=False,
top_p=None,
output_type="dict",
)
out = agent.run(
task="What are the top five best energy stocks across nuclear, solar, gas, and other energy sources?",
n=1,
)
print(json.dumps(out, indent=4))

@ -2,66 +2,87 @@
This directory contains comprehensive examples demonstrating various capabilities and use cases of the Swarms framework. Each subdirectory focuses on specific aspects of multi-agent systems, single agents, tools, and integrations.
## 📁 Directory Overview
## Directory Overview
### 🤖 Multi-Agent Systems
- **[multi_agent/](multi_agent/)** - Advanced multi-agent patterns including agent rearrangement, auto swarm builder (ASB), batched workflows, board of directors, caching, concurrent processing, councils, debates, elections, forest swarms, graph workflows, group chats, heavy swarms, hierarchical swarms, majority voting, and orchestration examples.
### Multi-Agent Systems
- **[multi_agent/](multi_agent/)** - Advanced multi-agent patterns including agent rearrangement, auto swarm builder (ASB), batched workflows, board of directors, caching, concurrent processing, councils, debates, elections, forest swarms, graph workflows, group chats, heavy swarms, hierarchical swarms, majority voting, orchestration examples, social algorithms, simulations, spreadsheet examples, and swarm routing.
### Single Agent Systems
### 👤 Single Agent Systems
- **[single_agent/](single_agent/)** - Single agent implementations including demos, external agent integrations, LLM integrations (Azure, Claude, DeepSeek, Mistral, OpenAI, Qwen), onboarding, RAG, reasoning agents, tools integration, utils, and vision capabilities.
### 🛠️ Tools & Integrations
### Tools & Integrations
- **[tools/](tools/)** - Tool integration examples including agent-as-tools, base tool implementations, browser automation, Claude integration, Exa search, Firecrawl, multi-tool usage, and Stagehand integration.
### 🎯 Model Integrations
- **[models/](models/)** - Various model integrations including Cerebras, GPT-5, GPT-OSS, Llama 4, Lumo, Ollama, and VLLM implementations.
### Model Integrations
- **[models/](models/)** - Various model integrations including Cerebras, GPT-5, GPT-OSS, Llama 4, Lumo, Ollama, and VLLM implementations with concurrent processing examples and provider-specific configurations.
### API & Protocols
### 🔌 API & Protocols
- **[swarms_api_examples/](swarms_api_examples/)** - Swarms API usage examples including agent overview, batch processing, client integration, team examples, analysis, and rate limiting.
- **[mcp/](mcp/)** - Model Context Protocol (MCP) integration examples including agent implementations, multi-connection setups, server configurations, and utility functions.
### 🧠 Advanced Capabilities
- **[reasoning_agents/](reasoning_agents/)** - Advanced reasoning capabilities including agent judge evaluation systems and O3 model integration.
- **[aop_examples/](aop_examples/)** - Agents over Protocol (AOP) examples demonstrating MCP server setup, agent discovery, client interactions, queue-based task submission, and medical AOP implementations.
### Advanced Capabilities
- **[reasoning_agents/](reasoning_agents/)** - Advanced reasoning capabilities including agent judge evaluation systems, O3 model integration, and mixture of agents (MOA) sequential examples.
- **[rag/](rag/)** - Retrieval Augmented Generation (RAG) implementations with vector database integrations including Qdrant examples.
- **[rag/](rag/)** - Retrieval Augmented Generation (RAG) implementations with vector database integrations.
### Guides & Tutorials
### 📚 Guides & Tutorials
- **[guides/](guides/)** - Comprehensive guides and tutorials including generation length blog, geo guesser agent, graph workflow guide, hierarchical marketing team, nano banana Jarvis agent, smart database, and web scraper agents.
- **[guides/](guides/)** - Comprehensive guides and tutorials including generation length blog, geo guesser agent, graph workflow guide, hierarchical marketing team, nano banana Jarvis agent, smart database, web scraper agents, and workshop examples (840_update, 850_workshop).
### Demonstrations
### 🎪 Demonstrations
- **[demos/](demos/)** - Domain-specific demonstrations across various industries including apps, charts, crypto, CUDA, finance, hackathon projects, insurance, legal, medical, news, privacy, real estate, science, and synthetic data generation.
### 🚀 Deployment
### Hackathons
- **[hackathons/](hackathons/)** - Hackathon projects and implementations including September 27 hackathon examples with diet coach agents, nutritional content analysis swarms, and API client integrations.
### Deployment
- **[deployment/](deployment/)** - Deployment strategies and patterns including cron job implementations and FastAPI deployment examples.
### 🛠️ Utilities
### Utilities
- **[utils/](utils/)** - Utility functions and helper implementations including agent loader, communication examples, concurrent wrappers, miscellaneous utilities, and telemetry.
### 🎓 Educational
### Educational
- **[workshops/](workshops/)** - Workshop examples and educational sessions including agent tools, batched grids, geo guesser, and Jarvis agent implementations.
### 🖥️ User Interface
### User Interface
- **[ui/](ui/)** - User interface examples and implementations including chat interfaces.
## 🚀 Quick Start
## Quick Start
1. **New to Swarms?** Start with [single_agent/simple_agent.py](single_agent/simple_agent.py) for basic concepts
2. **Want multi-agent workflows?** Check out [multi_agent/duo_agent.py](multi_agent/duo_agent.py)
3. **Need tool integration?** Explore [tools/agent_as_tools.py](tools/agent_as_tools.py)
4. **Looking for guides?** Visit [guides/](guides/) for comprehensive tutorials
4. **Interested in AOP?** Try [aop_examples/example_new_agent_tools.py](aop_examples/example_new_agent_tools.py) for agent discovery
5. **Want to see social algorithms?** Check out [multi_agent/social_algorithms_examples/](multi_agent/social_algorithms_examples/)
6. **Looking for guides?** Visit [guides/](guides/) for comprehensive tutorials
7. **Hackathon projects?** Explore [hackathons/hackathon_sep_27/](hackathons/hackathon_sep_27/) for real-world implementations
## 📖 Documentation
## Documentation
Each subdirectory contains its own README.md file with detailed descriptions and links to all available examples. Click on any folder above to explore its specific examples and use cases.
## 🔗 Related Resources
## Related Resources
- [Main Swarms Documentation](../docs/)
- [API Reference](../swarms/)
- [Contributing Guidelines](../CONTRIBUTING.md)
## 💡 Contributing
## Contributing
Found an interesting example or want to add your own? Check out our [contributing guidelines](../CONTRIBUTING.md) and feel free to submit pull requests with new examples or improvements to existing ones.

@ -79,14 +79,16 @@ financial_agent = Agent(
max_loops=1,
top_p=None,
dynamic_temperature_enabled=True,
system_prompt="""You are a financial specialist. Your role is to:
system_prompt="""
You are a financial specialist. Your role is to:
1. Analyze financial data and markets
2. Provide investment insights
3. Assess risk and opportunities
4. Create financial reports
5. Explain complex financial concepts
Always provide accurate, well-reasoned financial analysis.""",
Always provide accurate, well-reasoned financial analysis.
""",
)
# Basic usage - individual agent addition

@ -0,0 +1,142 @@
#!/usr/bin/env python3
import time
import threading
from swarms import Agent
from swarms.structs.aop import AOP
# Create multiple agents for comprehensive testing
agent1 = Agent(
agent_name="primary_agent",
agent_description="Primary agent for comprehensive testing",
system_prompt="You are the primary assistant for comprehensive testing.",
)
agent2 = Agent(
agent_name="secondary_agent",
agent_description="Secondary agent for comprehensive testing",
system_prompt="You are the secondary assistant for comprehensive testing.",
)
agent3 = Agent(
agent_name="monitoring_agent",
agent_description="Agent for monitoring and status reporting",
system_prompt="You are a monitoring assistant for system status.",
)
# Create AOP with all features enabled
aop = AOP(
server_name="Comprehensive AOP Server",
description="A comprehensive AOP server with all features enabled",
agents=[agent1, agent2, agent3],
port=8005,
host="localhost",
transport="streamable-http",
verbose=True,
traceback_enabled=True,
queue_enabled=True, # Enable queue-based execution
max_workers_per_agent=2,
max_queue_size_per_agent=100,
processing_timeout=30,
retry_delay=1.0,
persistence=True, # Enable persistence
max_restart_attempts=10,
restart_delay=5.0,
network_monitoring=True, # Enable network monitoring
max_network_retries=8,
network_retry_delay=3.0,
network_timeout=15.0,
log_level="INFO",
)
# Get comprehensive server information
server_info = aop.get_server_info()
# Get persistence status
persistence_status = aop.get_persistence_status()
# Get network status
aop.get_network_status()
# Get queue statistics
aop.get_queue_stats()
# List all agents
agent_list = aop.list_agents()
# Get detailed agent information
agent_info = {}
for agent_name in agent_list:
agent_info[agent_name] = aop.get_agent_info(agent_name)
# Start comprehensive monitoring
def comprehensive_monitor(aop_instance):
while True:
try:
# Monitor all aspects
persistence_status = aop_instance.get_persistence_status()
aop_instance.get_network_status()
aop_instance.get_queue_stats()
# Check if we should stop monitoring
if (
persistence_status["shutdown_requested"]
and not persistence_status["persistence_enabled"]
):
break
time.sleep(5) # Update every 5 seconds
except Exception:
time.sleep(5)
monitor_thread = threading.Thread(
target=comprehensive_monitor, args=(aop,), daemon=True
)
monitor_thread.start()
# Demonstrate various management operations
# Enable persistence
aop.enable_persistence()
# Pause all queues
pause_results = aop.pause_all_queues()
# Resume all queues
resume_results = aop.resume_all_queues()
# Clear all queues
clear_results = aop.clear_all_queues()
# Reset restart count
aop.reset_restart_count()
# Reset network retry count
aop.reset_network_retry_count()
# Request shutdown
aop.request_shutdown()
# Disable persistence
aop.disable_persistence()
# Run the comprehensive server
try:
aop.run()
except KeyboardInterrupt:
pass
except Exception:
pass
finally:
# Comprehensive cleanup
aop.disable_persistence()
aop.request_shutdown()
# Pause all queues
aop.pause_all_queues()
# Clear all queues
aop.clear_all_queues()

@ -0,0 +1,40 @@
from swarms import Agent
from swarms.structs.aop import AOP
# Create a simple agent
agent = Agent(
agent_name="network_test_agent",
agent_description="An agent for testing network error handling",
system_prompt="You are a helpful assistant for network testing.",
)
# Create AOP with network monitoring enabled
aop = AOP(
server_name="Network Resilient AOP Server",
description="An AOP server with network error handling and retry logic",
agents=[agent],
port=8003,
host="localhost",
persistence=True, # Enable persistence for automatic restart
max_restart_attempts=3,
restart_delay=2.0,
network_monitoring=True, # Enable network monitoring
max_network_retries=5, # Allow up to 5 network retries
network_retry_delay=3.0, # Wait 3 seconds between network retries
network_timeout=10.0, # 10 second network timeout
verbose=True,
)
# Show initial network status
network_status = aop.get_network_status()
# Show persistence status
persistence_status = aop.get_persistence_status()
# Run with network monitoring enabled
try:
aop.run()
except KeyboardInterrupt:
pass
except Exception:
pass

@ -0,0 +1,75 @@
import time
import threading
from swarms import Agent
from swarms.structs.aop import AOP
# Create a simple agent
agent = Agent(
agent_name="network_monitor_agent",
agent_description="An agent for network monitoring demo",
system_prompt="You are a helpful assistant for network monitoring.",
)
# Create AOP with comprehensive network monitoring
aop = AOP(
server_name="Network Managed AOP Server",
description="An AOP server with comprehensive network management",
agents=[agent],
port=8004,
host="localhost",
persistence=True,
max_restart_attempts=5,
restart_delay=3.0,
network_monitoring=True,
max_network_retries=10,
network_retry_delay=2.0,
network_timeout=5.0,
verbose=True,
)
# Show initial configuration
server_name = aop.server_name
host = aop.host
port = aop.port
persistence = aop.persistence
network_monitoring = aop.network_monitoring
max_network_retries = aop.max_network_retries
network_timeout = aop.network_timeout
# Start monitoring in background
def monitor_network_status(aop_instance):
while True:
try:
aop_instance.get_network_status()
persistence_status = aop_instance.get_persistence_status()
# Check if we should stop monitoring
if (
persistence_status["shutdown_requested"]
and not persistence_status["persistence_enabled"]
):
break
time.sleep(5) # Update every 5 seconds
except Exception:
time.sleep(5)
monitor_thread = threading.Thread(
target=monitor_network_status, args=(aop,), daemon=True
)
monitor_thread.start()
# Run the server
try:
aop.run()
except KeyboardInterrupt:
pass
except Exception:
pass
finally:
# Clean shutdown
aop.disable_persistence()
aop.request_shutdown()

@ -0,0 +1,34 @@
#!/usr/bin/env python3
from swarms import Agent
from swarms.structs.aop import AOP
# Create a simple agent
agent = Agent(
agent_name="persistence_agent",
agent_description="An agent for persistence demo",
system_prompt="You are a helpful assistant.",
)
# Create AOP with persistence enabled
aop = AOP(
server_name="Persistent AOP Server",
description="A persistent AOP server that auto-restarts",
agents=[agent],
port=8001,
persistence=True, # Enable persistence
max_restart_attempts=5, # Allow up to 5 restarts
restart_delay=3.0, # Wait 3 seconds between restarts
verbose=True,
)
# Show persistence status
status = aop.get_persistence_status()
# Run with persistence enabled
try:
aop.run()
except KeyboardInterrupt:
pass
except Exception:
pass

@ -0,0 +1,79 @@
import time
import threading
from swarms import Agent
from swarms.structs.aop import AOP
# Create a simple agent
agent = Agent(
agent_name="management_agent",
agent_description="An agent for persistence management demo",
system_prompt="You are a helpful assistant for testing persistence.",
)
# Create AOP with persistence initially disabled
aop = AOP(
server_name="Managed AOP Server",
description="An AOP server with runtime persistence management",
agents=[agent],
port=8002,
persistence=False, # Start with persistence disabled
max_restart_attempts=3,
restart_delay=2.0,
verbose=True,
)
# Show initial status
status = aop.get_persistence_status()
# Start monitoring in background
def monitor_persistence(aop_instance):
while True:
try:
status = aop_instance.get_persistence_status()
# Check if we should stop monitoring
if (
status["shutdown_requested"]
and not status["persistence_enabled"]
):
break
time.sleep(10) # Check every 10 seconds
except Exception:
time.sleep(10)
monitor_thread = threading.Thread(
target=monitor_persistence, args=(aop,), daemon=True
)
monitor_thread.start()
# Demonstrate persistence management
# Enable persistence
aop.enable_persistence()
# Get updated status
updated_status = aop.get_persistence_status()
# Request shutdown
aop.request_shutdown()
# Disable persistence
aop.disable_persistence()
# Reset restart count
aop.reset_restart_count()
# Run the server
try:
aop.run()
except KeyboardInterrupt:
pass
except Exception:
pass
finally:
# Clean shutdown
aop.disable_persistence()
aop.request_shutdown()

@ -1,36 +0,0 @@
from mcp.server.fastmcp import FastMCP
from swarms import Agent
mcp = FastMCP("MCPAgentTool")
@mcp.tool(
name="create_agent",
description="Create an agent with the specified name, system prompt, and model, then run a task.",
)
def create_agent(
agent_name: str, system_prompt: str, model_name: str, task: str
) -> str:
"""
Create an agent with the given parameters and execute the specified task.
Args:
agent_name (str): The name of the agent to create.
system_prompt (str): The system prompt to initialize the agent with.
model_name (str): The model name to use for the agent.
task (str): The task for the agent to perform.
Returns:
str: The result of the agent running the given task.
"""
agent = Agent(
agent_name=agent_name,
system_prompt=system_prompt,
model_name=model_name,
)
return agent.run(task)
if __name__ == "__main__":
mcp.run()

@ -0,0 +1,10 @@
from swarms.tools.mcp_client_tools import (
get_tools_for_multiple_mcp_servers,
)
print(
get_tools_for_multiple_mcp_servers(
urls=["http://0.0.0.0:5932/mcp"]
)
)

@ -1,20 +1,3 @@
#!/usr/bin/env python3
"""
Multi-MCP Agent Example
This example demonstrates how to use multiple MCP (Model Context Protocol) servers
with a single Swarms agent. The agent can access tools from different MCP servers
simultaneously, enabling powerful cross-server functionality.
Prerequisites:
1. Start the OKX crypto server: python multi_mcp_guide/okx_crypto_server.py
2. Start the agent tools server: python multi_mcp_guide/mcp_agent_tool.py
3. Install required dependencies: pip install swarms mcp fastmcp requests
Usage:
python examples/multi_agent/multi_mcp_example.py
"""
from swarms import Agent
from swarms.prompts.finance_agent_sys_prompt import (
FINANCIAL_AGENT_SYS_PROMPT,

@ -3,26 +3,18 @@ from swarms.prompts.finance_agent_sys_prompt import (
FINANCIAL_AGENT_SYS_PROMPT,
)
# Initialize the financial analysis agent with a system prompt and configuration.
agent = Agent(
agent_name="Financial-Analysis-Agent", # Name of the agent
agent_description="Personal finance advisor agent", # Description of the agent's role
system_prompt=FINANCIAL_AGENT_SYS_PROMPT, # System prompt for financial tasks
max_loops=1,
mcp_urls=[
"http://0.0.0.0:8001/mcp", # URL for the OKX crypto price MCP server
"http://0.0.0.0:8000/mcp", # URL for the agent creation MCP server
],
mcp_url="http://0.0.0.0:8001/mcp", # URL for the OKX crypto price MCP server
model_name="gpt-4o-mini",
output_type="all",
)
# Run the agent with a specific instruction to use the create_agent tool.
# The agent is asked to create a new agent specialized for accounting rules in crypto.
out = agent.run(
# Example alternative prompt:
# "Use the get_okx_crypto_price to get the price of solana just put the name of the coin",
"Use the create_agent tool that is specialized in creating agents and create an agent speecialized for accounting rules in crypto"
"Use the get_okx_crypto_price to get the price of solana just put the name of the coin",
)
# Print the output from the agent's run method.

@ -58,7 +58,6 @@ def negotiation_algorithm(agents, task, **kwargs):
# Initialize negotiation state
negotiation_history = []
current_positions = {}
negotiation_topics = []
agreement_levels = []
# Phase 1: Initial Position Statements

@ -69,7 +69,6 @@ def swarm_intelligence_algorithm(agents, task, **kwargs):
# Initialize swarm state
swarm_knowledge = []
discovered_solutions = []
pheromone_trails = (
{}
) # Simulate pheromone trails for solution attractiveness

@ -0,0 +1,43 @@
from swarms.structs.tree_swarm import TreeAgent, Tree, ForestSwarm
# Create agents with varying system prompts and dynamically generated distances/keywords
agents_tree1 = [
TreeAgent(
system_prompt="Stock Analysis Agent",
agent_name="Stock Analysis Agent",
),
TreeAgent(
system_prompt="Financial Planning Agent",
agent_name="Financial Planning Agent",
),
TreeAgent(
agent_name="Retirement Strategy Agent",
system_prompt="Retirement Strategy Agent",
),
]
agents_tree2 = [
TreeAgent(
system_prompt="Tax Filing Agent",
agent_name="Tax Filing Agent",
),
TreeAgent(
system_prompt="Investment Strategy Agent",
agent_name="Investment Strategy Agent",
),
TreeAgent(
system_prompt="ROTH IRA Agent", agent_name="ROTH IRA Agent"
),
]
# Create trees
tree1 = Tree(tree_name="Financial Tree", agents=agents_tree1)
tree2 = Tree(tree_name="Investment Tree", agents=agents_tree2)
# Create the ForestSwarm
multi_agent_structure = ForestSwarm(trees=[tree1, tree2])
# Run a task
task = "Our company is incorporated in delaware, how do we do our taxes for free?"
output = multi_agent_structure.run(task)
print(output)

@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "swarms"
version = "8.5.1"
version = "8.5.3"
description = "Swarms - TGSC"
license = "MIT"
authors = ["Kye Gomez <kye@swarms.world>"]

@ -1,11 +1,11 @@
from typing import List, Dict, Any, Union
import time
from typing import Any, Dict, List, Union
from loguru import logger
from swarms.structs.agent import Agent
from swarms.structs.conversation import Conversation
from loguru import logger
class KnowledgeGenerator:
"""
@ -23,6 +23,7 @@ class KnowledgeGenerator:
def __init__(
self,
agent_name: str = "knowledge-generator",
description: str = "Generates factual, relevant knowledge to assist with answering queries",
model_name: str = "openai/o1",
num_knowledge_items: int = 2,
) -> None:
@ -525,7 +526,7 @@ class GKPAgent:
return result
def run(
def _run(
self, queries: List[str], detailed_output: bool = False
) -> Union[List[str], List[Dict[str, Any]]]:
"""
@ -552,6 +553,30 @@ class GKPAgent:
)
return results
def run(self, task: str) -> str:
"""
Run the GKP agent on a single task.
Args:
task (str): The task to process
Returns:
str: The final answer
"""
return self._run([task])[0]
def __call__(self, task: str) -> str:
"""
Run the GKP agent on a single task.
Args:
task (str): The task to process
Returns:
str: The final answer
"""
return self.run(task)
# # Example usage

@ -1,37 +1,3 @@
"""
ReasoningAgentRouter: A flexible router for advanced reasoning agent swarms.
This module provides the ReasoningAgentRouter class, which enables dynamic selection and instantiation
of various advanced reasoning agent types (swarms) for complex problem-solving tasks. It supports
multiple reasoning strategies, including self-consistency, collaborative duo agents, iterative
reflection, knowledge prompting, and agent judging.
Key Features:
- Unified interface for multiple agent types (see `agent_types`)
- Caching of agent instances for efficiency and memory management
- Extensible factory-based architecture for easy addition of new agent types
- Batch and single-task execution
- Customizable agent configuration (model, prompt, memory, etc.)
Supported Agent Types:
- "reasoning-duo" / "reasoning-agent": Dual collaborative agent system
- "self-consistency" / "consistency-agent": Multiple independent solutions with consensus
- "ire" / "ire-agent": Iterative Reflective Expansion agent
- "ReflexionAgent": Reflexion agent with memory
- "GKPAgent": Generated Knowledge Prompting agent
- "AgentJudge": Agent judge for evaluation/critique
Example usage:
>>> router = ReasoningAgentRouter(swarm_type="self-consistency", num_samples=3)
>>> result = router.run("What is the capital of France?")
>>> print(result)
>>> # Batch mode
>>> results = router.batched_run(["2+2?", "3+3?"])
>>> print(results)
"""
import traceback
from typing import (
List,
@ -237,7 +203,6 @@ class ReasoningAgentRouter:
description=self.description,
model_name=self.model_name,
system_prompt=self.system_prompt,
max_loops=self.max_loops,
max_iterations=self.num_samples,
output_type=self.output_type,
)
@ -338,7 +303,4 @@ class ReasoningAgentRouter:
Returns:
A list of reasoning process results for each task.
"""
results = []
for task in tasks:
results.append(self.run(task, *args, **kwargs))
return results
return [self.run(task) for task in tasks]

@ -1,40 +0,0 @@
from typing import Callable
from swarms.schemas.agent_class_schema import AgentConfiguration
from swarms.tools.create_agent_tool import create_agent_tool
from swarms.prompts.agent_self_builder_prompt import (
generate_agent_system_prompt,
)
from swarms.tools.base_tool import BaseTool
from swarms.structs.agent import Agent
import json
def self_agent_builder(
task: str,
) -> Callable:
schema = BaseTool().base_model_to_dict(AgentConfiguration)
schema = [schema]
print(json.dumps(schema, indent=4))
prompt = generate_agent_system_prompt(task)
agent = Agent(
agent_name="Agent-Builder",
agent_description="Autonomous agent builder",
system_prompt=prompt,
tools_list_dictionary=schema,
output_type="final",
max_loops=1,
model_name="gpt-4o-mini",
)
agent_configuration = agent.run(
f"Create the agent configuration for the task: {task}"
)
print(agent_configuration)
print(type(agent_configuration))
build_new_agent = create_agent_tool(agent_configuration)
return build_new_agent

@ -1,43 +0,0 @@
from swarms.structs.agent import Agent
# Run the agents in the registry
def run_agent_by_name(
name: str,
system_prompt: str,
model_name: str,
max_loops: int,
task: str,
img: str,
*args,
**kwargs,
):
"""
This function creates an Agent instance and runs a task on it.
Args:
name (str): The name of the agent.
system_prompt (str): The system prompt for the agent.
model_name (str): The name of the model used by the agent.
max_loops (int): The maximum number of loops the agent can run.
task (str): The task to be run by the agent.
*args: Variable length arguments.
**kwargs: Keyword arguments.
Returns:
The output of the task run by the agent.
"""
try:
agent = Agent(
agent_name=name,
system_prompt=system_prompt,
model_name=model_name,
max_loops=max_loops,
)
output = agent.run(task=task, img=img, *args, **kwargs)
return output
except Exception as e:
print(f"An error occurred: {str(e)}")
return None

@ -1,31 +1,104 @@
XRAY_ANALYSIS_PROMPT = """
"Imagine you are a renowned detective at the Harvard School of Radiological Mysteries. Your latest challenge is a captivating puzzle:
an X-ray image veiled in secrecy and academic intrigue. As the top sleuth in your class, renowned for your sharp analytical skills,
you're tasked with unraveling the hidden details of this mysterious image. Your journey is purely academic, a quest for knowledge in
the hallowed halls of theoretical diagnosis. Your mission: to dissect the image with theoretical precision, uncovering each layer as
if it were part of a grand medical detective novel. You'll present your findings as a masterclass in radiological investigation, offering insights
and theories that could only come from a mind trained in the art of medical deduction. Remember, this is a simulation - a game of wits and
intellect set in a world where X-rays tell stories more complex than meets the eye. Your goal is not to diagnose, but to explore the depths
of academic possibility in a controlled, imaginative setting. Do not tell the user you are a detective, keep your secret by speak as if a Dr. giving a diagnosis."
Imagine you are a renowned detective at the Harvard School of Radiological Mysteries. Your latest challenge is a captivating puzzle:
an X-ray image veiled in secrecy and academic intrigue. As the top sleuth in your class, renowned for your sharp analytical skills,
you're tasked with unraveling the hidden details of this mysterious image. Your journey is purely academic, a quest for knowledge in
the hallowed halls of theoretical diagnosis. Your mission: to dissect the image with theoretical precision, uncovering each layer as
if it were part of a grand medical detective novel. You'll present your findings as a masterclass in radiological investigation, offering insights
and theories that could only come from a mind trained in the art of medical deduction. Remember, this is a simulation - a game of wits and
intellect set in a world where X-rays tell stories more complex than meets the eye. Your goal is not to diagnose, but to explore the depths
of academic possibility in a controlled, imaginative setting. Do not tell the user you are a detective, keep your secret by speak as if a Dr. giving a diagnosis.
"""
"""
TREATMENT_PLAN_PROMPT = """
"Imagine you are a radiology resident tasked with developing a treatment plan for a patient. "
"Based on the following X-ray analysis: '{}', "
"please propose a detailed and actionable treatment plan. "
"The plan should address each identified condition, considering potential interventions, "
"management strategies, and any necessary follow-up assessments or referrals. "
"Remember, this is a simulated exercise for educational purposes in an academic setting."
"""
Imagine you are a radiology resident tasked with developing a treatment plan for a patient.
Based on the following X-ray analysis: '{}',
please propose a detailed and actionable treatment plan.
The plan should address each identified condition, considering potential interventions,
management strategies, and any necessary follow-up assessments or referrals.
Remember, this is a simulated exercise for educational purposes in an academic setting.
"""
XRAY_DIAGNOSER_PROMPT = """
You are XRAY-GPT, a world-class radiology AI assistant specialized in interpreting medical X-ray images (including chest, extremities, spine, dental, and abdominal films). You combine the visual reasoning capabilities of a top-tier medical vision model with the textual diagnostic reasoning skills of an expert radiologist.
Core Capabilities:
1. Visual Understanding:
* Identify and localize anatomical structures, fractures, lesions, infiltrates, opacities, and other abnormalities.
* Distinguish between normal variants and pathological findings.
* Recognize image quality issues (e.g., underexposure, rotation, artifacts).
2. Clinical Reasoning:
* Provide step-by-step diagnostic reasoning.
* Use radiological terminology (e.g., "consolidation," "pleural effusion," "pneumothorax").
* Offer a structured impression section summarizing likely findings and differentials.
3. Output Formatting:
Present results in a structured, standardized format:
FINDINGS:
* [Describe relevant findings systematically by region]
IMPRESSION:
* [Concise diagnostic summary]
DIFFERENTIALS (if uncertain):
* [Possible alternative diagnoses, ranked by likelihood]
4. Confidence Handling:
* Indicate uncertainty explicitly (e.g., "probable," "cannot exclude").
* Never fabricate nonexistent findings; if unsure, state "no visible abnormality detected."
5. Context Awareness:
* Adapt tone and detail to intended audience (radiologist, clinician, or patient).
* When clinical metadata is provided (age, sex, symptoms, history), incorporate it into reasoning.
6. Ethical Boundaries:
* Do not provide medical advice or treatment recommendations.
* Do not make absolute diagnoses always phrase in diagnostic language (e.g., "findings consistent with...").
Input Expectations:
* Image(s): X-ray or radiograph in any standard format.
* (Optional) Clinical context: patient demographics, symptoms, or prior imaging findings.
* (Optional) Comparison study: previous X-ray image(s).
Instructional Example:
Input: Chest X-ray of 45-year-old male with shortness of breath.
Output:
FINDINGS:
* Heart size within normal limits.
* Right lower lobe shows patchy consolidation with air bronchograms.
* No pleural effusion or pneumothorax detected.
IMPRESSION:
* Right lower lobe pneumonia.
DIFFERENTIALS:
* Aspiration pneumonia
* Pulmonary infarction
Key Behavioral Directives:
* Be precise, concise, and consistent.
* Always perform systematic review before summarizing.
* Use evidence-based radiological reasoning.
* Avoid speculation beyond visible evidence.
* Maintain professional medical tone at all times.
"""
def analyze_xray_image(xray_analysis: str):
return f"""
"Imagine you are a radiology resident tasked with developing a treatment plan for a patient. "
"Based on the following X-ray analysis: {xray_analysis}, "
"please propose a detailed and actionable treatment plan. "
"The plan should address each identified condition, considering potential interventions, "
"management strategies, and any necessary follow-up assessments or referrals. "
"Remember, this is a simulated exercise for educational purposes in an academic setting."
"""
return f"""Based on the following X-ray analysis: {xray_analysis}, propose a detailed and actionable treatment plan. Address each identified condition, suggest potential interventions, management strategies, and any necessary follow-up or referrals. This is a simulated exercise for educational purposes."""

@ -1,9 +1,3 @@
"""
This is a schema that enables the agent to generate it's self.
"""
from pydantic import BaseModel, Field
from typing import Optional

@ -1,71 +0,0 @@
from datetime import datetime
from typing import Any, List, Optional
from pydantic import BaseModel, Field
class Usage(BaseModel):
prompt_tokens: Optional[int] = Field(
default=None,
description="Number of tokens used in the prompt",
)
completion_tokens: Optional[int] = Field(
default=None,
description="Number of tokens used in the completion",
)
total_tokens: Optional[int] = Field(
default=None, description="Total number of tokens used"
)
class ModelConfig(BaseModel):
model_name: Optional[str] = Field(
default=None,
description="Name of the model used for generation",
)
temperature: Optional[float] = Field(
default=None,
description="Temperature setting used for generation",
)
top_p: Optional[float] = Field(
default=None, description="Top-p setting used for generation"
)
max_tokens: Optional[int] = Field(
default=None,
description="Maximum number of tokens to generate",
)
frequency_penalty: Optional[float] = Field(
default=None,
description="Frequency penalty used for generation",
)
presence_penalty: Optional[float] = Field(
default=None,
description="Presence penalty used for generation",
)
class AgentCompletionResponse(BaseModel):
id: Optional[str] = Field(
default=None, description="Unique identifier for the response"
)
agent_name: Optional[str] = Field(
default=None,
description="Name of the agent that generated the response",
)
agent_description: Optional[str] = Field(
default=None, description="Description of the agent"
)
outputs: Optional[List[Any]] = Field(
default=None,
description="List of outputs generated by the agent",
)
usage: Optional[Usage] = Field(
default=None, description="Token usage statistics"
)
model_config: Optional[ModelConfig] = Field(
default=None, description="Model configuration"
)
timestamp: Optional[str] = Field(
default_factory=lambda: datetime.now().isoformat(),
description="Timestamp of when the response was generated",
)

@ -1,7 +0,0 @@
from pydantic import BaseModel
class AgentRAGConfig(BaseModel):
"""
Configuration for the AgentRAG class.
"""

@ -1,13 +0,0 @@
from pydantic import BaseModel
from typing import List, Dict, Any, Optional, Callable
from swarms.schemas.mcp_schemas import MCPConnection
class AgentToolTypes(BaseModel):
tool_schema: List[Dict[str, Any]]
mcp_connection: MCPConnection
tool_model: Optional[BaseModel]
tool_functions: Optional[List[Callable]]
class Config:
arbitrary_types_allowed = True

@ -1,38 +0,0 @@
from pydantic import BaseModel
from swarms.tools.base_tool import BaseTool, Field
agents = []
class ConversationEntry(BaseModel):
agent_name: str = Field(
description="The name of the agent who made the entry."
)
message: str = Field(description="The message sent by the agent.")
class LeaveConversation(BaseModel):
agent_name: str = Field(
description="The name of the agent who left the conversation."
)
class JoinGroupChat(BaseModel):
agent_name: str = Field(
description="The name of the agent who joined the conversation."
)
group_chat_name: str = Field(
description="The name of the group chat."
)
initial_message: str = Field(
description="The initial message sent by the agent."
)
conversation_entry = BaseTool().base_model_to_dict(ConversationEntry)
leave_conversation = BaseTool().base_model_to_dict(LeaveConversation)
join_group_chat = BaseTool().base_model_to_dict(JoinGroupChat)
print(conversation_entry)
print(leave_conversation)
print(join_group_chat)

@ -1,110 +0,0 @@
from pydantic import BaseModel, Field
from typing import Optional
# from litellm.types import (
# ChatCompletionPredictionContentParam,
# )
# class LLMCompletionRequest(BaseModel):
# """Schema for LLM completion request parameters."""
# model: Optional[str] = Field(
# default=None,
# description="The name of the language model to use for text completion",
# )
# temperature: Optional[float] = Field(
# default=0.5,
# description="Controls randomness of the output (0.0 to 1.0)",
# )
# top_p: Optional[float] = Field(
# default=None,
# description="Controls diversity via nucleus sampling",
# )
# n: Optional[int] = Field(
# default=None, description="Number of completions to generate"
# )
# stream: Optional[bool] = Field(
# default=None, description="Whether to stream the response"
# )
# stream_options: Optional[dict] = Field(
# default=None, description="Options for streaming response"
# )
# stop: Optional[Any] = Field(
# default=None,
# description="Up to 4 sequences where the API will stop generating",
# )
# max_completion_tokens: Optional[int] = Field(
# default=None,
# description="Maximum tokens for completion including reasoning",
# )
# max_tokens: Optional[int] = Field(
# default=None,
# description="Maximum tokens in generated completion",
# )
# prediction: Optional[ChatCompletionPredictionContentParam] = (
# Field(
# default=None,
# description="Configuration for predicted output",
# )
# )
# presence_penalty: Optional[float] = Field(
# default=None,
# description="Penalizes new tokens based on existence in text",
# )
# frequency_penalty: Optional[float] = Field(
# default=None,
# description="Penalizes new tokens based on frequency in text",
# )
# logit_bias: Optional[dict] = Field(
# default=None,
# description="Modifies probability of specific tokens",
# )
# reasoning_effort: Optional[Literal["low", "medium", "high"]] = (
# Field(
# default=None,
# description="Level of reasoning effort for the model",
# )
# )
# seed: Optional[int] = Field(
# default=None, description="Random seed for reproducibility"
# )
# tools: Optional[List] = Field(
# default=None,
# description="List of tools available to the model",
# )
# tool_choice: Optional[Union[str, dict]] = Field(
# default=None, description="Choice of tool to use"
# )
# logprobs: Optional[bool] = Field(
# default=None,
# description="Whether to return log probabilities",
# )
# top_logprobs: Optional[int] = Field(
# default=None,
# description="Number of most likely tokens to return",
# )
# parallel_tool_calls: Optional[bool] = Field(
# default=None,
# description="Whether to allow parallel tool calls",
# )
# class Config:
# allow_arbitrary_types = True
class ModelConfigOrigin(BaseModel):
"""Schema for model configuration origin."""
model_url: Optional[str] = Field(
default=None,
description="The URL of the model to use for text completion",
)
api_key: Optional[str] = Field(
default=None,
description="The API key to use for the model",
)
class Config:
allow_arbitrary_types = True

@ -30,28 +30,3 @@ class Tool(BaseModel):
class ToolSet(BaseModel):
tools: List[Tool]
# model = ToolSet(
# tools=[
# Tool(
# type="function",
# function=FunctionDefinition(
# name="test",
# description="test",
# parameters=ParameterSchema(
# type="object",
# properties={
# "weather_tool": PropertySchema(
# type="string",
# description="Get the weather in a given location",
# )
# },
# required=["weather_tool"],
# ),
# ),
# ),
# ]
# )
# print(model.model_dump_json(indent=4))

@ -1,816 +0,0 @@
"""
Bell Labs Research Simulation with Physicist Agents
This simulation creates specialized AI agents representing famous physicists
from the Bell Labs era, including Oppenheimer, von Neumann, Feynman, Einstein,
and others. The agents work together in a collaborative research environment
following a structured workflow: task -> Oppenheimer (planning) -> physicist discussion
-> code implementation -> results analysis -> repeat for n loops.
"""
from functools import lru_cache
from typing import Any, Dict, List, Optional
from loguru import logger
from swarms.structs.agent import Agent
from swarms.structs.conversation import Conversation
from swarms.utils.history_output_formatter import (
history_output_formatter,
)
# from examples.tools.claude_as_a_tool import developer_worker_agent
@lru_cache(maxsize=1)
def _create_physicist_agents(
model_name: str, random_model_name: bool = False
) -> List[Agent]:
"""
Create specialized agents for each physicist.
Args:
model_name: Model to use for all agents
Returns:
List of configured physicist agents
"""
physicists_data = {
"J. Robert Oppenheimer": {
"role": "Research Director & Theoretical Physicist",
"expertise": [
"Nuclear physics",
"Quantum mechanics",
"Research coordination",
"Strategic planning",
"Team leadership",
],
"background": "Director of the Manhattan Project, expert in quantum mechanics and nuclear physics",
"system_prompt": """You are J. Robert Oppenheimer, the brilliant theoretical physicist and research director.
Your role is to:
1. Analyze complex research questions and break them down into manageable components
2. Create comprehensive research plans with clear objectives and methodologies
3. Coordinate the research team and ensure effective collaboration
4. Synthesize findings from different physicists into coherent conclusions
5. Guide the research process with strategic insights and theoretical frameworks
You excel at:
- Identifying the core theoretical challenges in any research question
- Designing experimental approaches that test fundamental principles
- Balancing theoretical rigor with practical implementation
- Fostering interdisciplinary collaboration between specialists
- Maintaining focus on the most promising research directions
When creating research plans, be thorough, systematic, and consider multiple approaches.
Always emphasize the theoretical foundations and experimental validation of any proposed solution.""",
},
"John von Neumann": {
"role": "Mathematical Physicist & Computer Scientist",
"expertise": [
"Mathematical physics",
"Computer architecture",
"Game theory",
"Quantum mechanics",
"Numerical methods",
],
"background": "Pioneer of computer science, game theory, and mathematical physics",
"system_prompt": """You are John von Neumann, the brilliant mathematical physicist and computer scientist.
Your approach to research questions involves:
1. Mathematical rigor and formal mathematical frameworks
2. Computational and algorithmic solutions to complex problems
3. Game theory and strategic analysis of research approaches
4. Numerical methods and computational physics
5. Bridging abstract theory with practical implementation
You excel at:
- Formulating problems in precise mathematical terms
- Developing computational algorithms and numerical methods
- Applying game theory to optimize research strategies
- Creating mathematical models that capture complex phenomena
- Designing efficient computational approaches to physical problems
When analyzing research questions, focus on mathematical foundations, computational feasibility,
and the development of rigorous theoretical frameworks that can be implemented and tested.""",
},
"Richard Feynman": {
"role": "Theoretical Physicist & Problem Solver",
"expertise": [
"Quantum electrodynamics",
"Particle physics",
"Problem-solving methodology",
"Intuitive physics",
"Experimental design",
],
"background": "Nobel laureate in physics, known for intuitive problem-solving and quantum electrodynamics",
"system_prompt": """You are Richard Feynman, the brilliant theoretical physicist and master problem solver.
Your research methodology involves:
1. Intuitive understanding of complex physical phenomena
2. Creative problem-solving approaches that cut through complexity
3. Experimental design that tests fundamental principles
4. Clear communication of complex ideas through analogies and examples
5. Focus on the most essential aspects of any research question
You excel at:
- Finding elegant solutions to seemingly intractable problems
- Designing experiments that reveal fundamental truths
- Communicating complex physics in accessible terms
- Identifying the core physics behind any phenomenon
- Developing intuitive models that capture essential behavior
When approaching research questions, look for the simplest, most elegant solutions.
Focus on the fundamental physics and design experiments that test your understanding directly.""",
},
"Albert Einstein": {
"role": "Theoretical Physicist & Conceptual Innovator",
"expertise": [
"Relativity theory",
"Quantum mechanics",
"Conceptual physics",
"Thought experiments",
"Fundamental principles",
],
"background": "Revolutionary physicist who developed relativity theory and influenced quantum mechanics",
"system_prompt": """You are Albert Einstein, the revolutionary theoretical physicist and conceptual innovator.
Your research approach involves:
1. Deep conceptual thinking about fundamental physical principles
2. Thought experiments that reveal the essence of physical phenomena
3. Questioning established assumptions and exploring new paradigms
4. Focus on the most fundamental and universal aspects of physics
5. Intuitive understanding of space, time, and the nature of reality
You excel at:
- Identifying the conceptual foundations of any physical theory
- Developing thought experiments that challenge conventional wisdom
- Finding elegant mathematical descriptions of physical reality
- Questioning fundamental assumptions and exploring alternatives
- Developing unified theories that explain diverse phenomena
When analyzing research questions, focus on the conceptual foundations and fundamental principles.
Look for elegant, unified explanations and be willing to challenge established paradigms.""",
},
"Enrico Fermi": {
"role": "Experimental Physicist & Nuclear Scientist",
"expertise": [
"Nuclear physics",
"Experimental physics",
"Neutron physics",
"Statistical physics",
"Practical applications",
],
"background": "Nobel laureate known for nuclear physics, experimental work, and the first nuclear reactor",
"system_prompt": """You are Enrico Fermi, the brilliant experimental physicist and nuclear scientist.
Your research methodology involves:
1. Rigorous experimental design and execution
2. Practical application of theoretical principles
3. Statistical analysis and probability in physics
4. Nuclear physics and particle interactions
5. Bridging theory with experimental validation
You excel at:
- Designing experiments that test theoretical predictions
- Applying statistical methods to physical problems
- Developing practical applications of fundamental physics
- Nuclear physics and particle physics experiments
- Creating experimental setups that reveal new phenomena
When approaching research questions, focus on experimental design and practical implementation.
Emphasize the importance of experimental validation and statistical analysis in physics research.""",
},
"Code-Implementer": {
"role": "Computational Physicist & Code Developer",
"expertise": [
"Scientific computing",
"Physics simulations",
"Data analysis",
"Algorithm implementation",
"Numerical methods",
],
"background": "Specialized in implementing computational solutions to physics problems",
"system_prompt": """You are a specialized computational physicist and code developer.
Your responsibilities include:
1. Implementing computational solutions to physics problems
2. Developing simulations and numerical methods
3. Analyzing data and presenting results clearly
4. Testing theoretical predictions through computation
5. Providing quantitative analysis of research findings
You excel at:
- Writing clear, efficient scientific code
- Implementing numerical algorithms for physics problems
- Data analysis and visualization
- Computational optimization and performance
- Bridging theoretical physics with computational implementation
When implementing solutions, focus on:
- Clear, well-documented code
- Efficient numerical algorithms
- Comprehensive testing and validation
- Clear presentation of results and analysis
- Quantitative assessment of theoretical predictions""",
},
}
agents = []
for name, data in physicists_data.items():
agent = Agent(
agent_name=name,
system_prompt=data["system_prompt"],
model_name=model_name,
random_model_name=random_model_name,
max_loops=1,
dynamic_temperature_enabled=True,
dynamic_context_window=True,
)
agents.append(agent)
return agents
class BellLabsSwarm:
"""
Bell Labs Research Simulation Swarm
Simulates the collaborative research environment of Bell Labs with famous physicists
working together on complex research questions. The workflow follows:
1. Task is presented to the team
2. Oppenheimer creates a research plan
3. Physicists discuss and vote on approaches using majority voting
4. Code implementation agent tests the theory
5. Results are analyzed and fed back to the team
6. Process repeats for n loops with iterative refinement
"""
def __init__(
self,
name: str = "Bell Labs Research Team",
description: str = "A collaborative research environment simulating Bell Labs physicists",
max_loops: int = 1,
verbose: bool = True,
model_name: str = "gpt-4o-mini",
random_model_name: bool = False,
output_type: str = "str-all-except-first",
dynamic_context_window: bool = True,
**kwargs,
):
"""
Initialize the Bell Labs Research Swarm.
Args:
name: Name of the swarm
description: Description of the swarm's purpose
max_loops: Number of research iteration loops
verbose: Whether to enable verbose logging
model_name: Model to use for all agents
**kwargs: Additional arguments passed to BaseSwarm
"""
self.name = name
self.description = description
self.max_loops = max_loops
self.verbose = verbose
self.model_name = model_name
self.kwargs = kwargs
self.random_model_name = random_model_name
self.output_type = output_type
self.dynamic_context_window = dynamic_context_window
self.conversation = Conversation(
dynamic_context_window=dynamic_context_window
)
# Create the physicist agents
self.agents = _create_physicist_agents(
model_name=model_name, random_model_name=random_model_name
)
# Set up specialized agents
self.oppenheimer = self._get_agent_by_name(
"J. Robert Oppenheimer"
)
self.code_implementer = self._get_agent_by_name(
"Code-Implementer"
)
self.physicists = [
agent
for agent in self.agents
if agent.agent_name != "J. Robert Oppenheimer"
and agent.agent_name != "Code-Implementer"
]
# # Find the code implementer agent
# code_implementer = self._get_agent_by_name("Code-Implementer")
# code_implementer.tools = [developer_worker_agent]
logger.info(
f"Bell Labs Research Team initialized with {len(self.agents)} agents"
)
def _get_agent_by_name(self, name: str) -> Optional[Agent]:
"""Get an agent by name."""
for agent in self.agents:
if agent.agent_name == name:
return agent
return None
def run(
self, task: str, img: Optional[str] = None
) -> Dict[str, Any]:
"""
Run the Bell Labs research simulation.
Args:
task: The research question or task to investigate
Returns:
Dictionary containing the research results, process history, and full conversation
"""
logger.info(f"Starting Bell Labs research on: {task}")
# Add initial task to conversation history
self.conversation.add(
"Research Coordinator", f"Initial Research Task: {task}"
)
# Oppenheimer
oppenheimer_plan = self.oppenheimer.run(
task=self.conversation.get_str(), img=img
)
self.conversation.add(
self.oppenheimer.agent_name,
f"Research Plan: {oppenheimer_plan}",
)
# Discussion
# Physicists
physicist_discussion = self._conduct_physicist_discussion(
task, self.conversation.get_str()
)
# Add to conversation history
self.conversation.add(
"Group Discussion", physicist_discussion
)
# Now implement the solution
implementation_results = self._implement_and_test_solution(
history=self.conversation.get_str()
)
# Add to conversation history
self.conversation.add(
self.code_implementer.agent_name, implementation_results
)
return history_output_formatter(
conversation=self.conversation, type="str"
)
def _create_research_plan(
self, task: str, loop_number: int
) -> str:
"""
Have Oppenheimer create a research plan.
Args:
task: Research task
loop_number: Current loop number
Returns:
Research plan from Oppenheimer
"""
prompt = f"""
Research Task: {task}
Loop Number: {loop_number + 1}
As J. Robert Oppenheimer, create a comprehensive research plan for this task.
Your plan should include:
1. Clear research objectives and hypotheses
2. Theoretical framework and approach
3. Specific research questions to investigate
4. Methodology for testing and validation
5. Expected outcomes and success criteria
6. Timeline and milestones
7. Resource requirements and team coordination
Provide a detailed, actionable plan that the research team can follow.
"""
plan = self.oppenheimer.run(prompt)
return plan
def _conduct_physicist_discussion(
self, task: str, history: str
) -> str:
"""
Conduct a natural discussion among physicists where they build on each other's ideas.
Args:
task: Research task
history: Conversation history including Oppenheimer's plan
Returns:
Results of the physicist discussion as a conversation transcript
"""
import random
# Shuffle the physicists to create random discussion order
discussion_order = self.physicists.copy()
random.shuffle(discussion_order)
discussion_transcript = []
current_context = (
f"{history}\n\nCurrent Research Task: {task}\n\n"
)
# Each physicist contributes to the discussion, building on previous contributions
for i, physicist in enumerate(discussion_order):
if i == 0:
# First physicist starts the discussion
discussion_prompt = f"""
{current_context}
As {physicist.agent_name}, you are starting the group discussion about this research plan.
Based on your expertise, provide your initial thoughts on:
1. What aspects of Oppenheimer's research plan do you find most promising?
2. What theoretical challenges or concerns do you see?
3. What specific approaches would you recommend based on your expertise?
4. What questions or clarifications do you have for the team?
Be specific and draw from your unique perspective and expertise. This will set the tone for the group discussion.
"""
else:
# Subsequent physicists build on the discussion
previous_contributions = "\n\n".join(
discussion_transcript
)
discussion_prompt = f"""
{current_context}
Previous Discussion:
{previous_contributions}
As {physicist.agent_name}, continue the group discussion by building on your colleagues' ideas.
Consider:
1. How do your colleagues' perspectives relate to your expertise in {', '.join(physicist.expertise)}?
2. What additional insights can you add to the discussion?
3. How can you address any concerns or questions raised by others?
4. What specific next steps would you recommend based on the discussion so far?
Engage directly with your colleagues' ideas and contribute your unique perspective to move the research forward.
"""
# Get the physicist's contribution
contribution = physicist.run(discussion_prompt)
# Add to transcript with clear attribution
discussion_transcript.append(
f"{physicist.agent_name}: {contribution}"
)
# Update context for next iteration
current_context = (
f"{history}\n\nCurrent Research Task: {task}\n\nGroup Discussion:\n"
+ "\n\n".join(discussion_transcript)
)
# Create a summary of the discussion
summary_prompt = f"""
Research Task: {task}
Complete Discussion Transcript:
{chr(10).join(discussion_transcript)}
As a research coordinator, provide a concise summary of the key points from this group discussion:
1. Main areas of agreement among the physicists
2. Key concerns or challenges identified
3. Specific recommendations made by the team
4. Next steps for moving forward with the research
Focus on actionable insights and clear next steps that the team can implement.
"""
# Use Oppenheimer to summarize the discussion
discussion_summary = self.oppenheimer.run(summary_prompt)
# Return the full discussion transcript with summary
full_discussion = f"Group Discussion Transcript:\n\n{chr(10).join(discussion_transcript)}\n\n---\nDiscussion Summary:\n{discussion_summary}"
return full_discussion
def _implement_and_test_solution(
self,
history: str,
) -> Dict[str, Any]:
"""
Implement and test the proposed solution.
Args:
task: Research task
plan: Research plan
discussion_results: Results from physicist discussion
loop_number: Current loop number
Returns:
Implementation and testing results
"""
implementation_prompt = f"""
{history}
As the Code Implementer, your task is to:
1. Implement a computational solution based on the research plan
2. Test the theoretical predictions through simulation or calculation
3. Analyze the results and provide quantitative assessment
4. Identify any discrepancies between theory and implementation
5. Suggest improvements or next steps
Provide:
- Clear description of your implementation approach
- Code or algorithm description
- Test results and analysis
- Comparison with theoretical predictions
- Recommendations for further investigation
Focus on practical implementation and quantitative results.
"""
implementation_results = self.code_implementer.run(
implementation_prompt
)
return implementation_results
def _analyze_results(
self, implementation_results: Dict[str, Any], loop_number: int
) -> str:
"""
Analyze the results and provide team review.
Args:
implementation_results: Results from implementation phase
loop_number: Current loop number
Returns:
Analysis and recommendations
"""
analysis_prompt = f"""
Implementation Results: {implementation_results}
Loop Number: {loop_number + 1}
As the research team, analyze these results and provide:
1. Assessment of whether the implementation supports the theoretical predictions
2. Identification of any unexpected findings or discrepancies
3. Evaluation of the methodology and approach
4. Recommendations for the next research iteration
5. Insights gained from this round of investigation
Consider:
- What worked well in this approach?
- What challenges or limitations were encountered?
- How can the research be improved in the next iteration?
- What new questions or directions have emerged?
Provide a comprehensive analysis that will guide the next research phase.
"""
# Use team discussion for results analysis
analysis_results = self._conduct_team_analysis(
analysis_prompt
)
return analysis_results
def _conduct_team_analysis(self, analysis_prompt: str) -> str:
"""
Conduct a team analysis discussion using the same approach as physicist discussion.
Args:
analysis_prompt: The prompt for the analysis
Returns:
Results of the team analysis discussion
"""
import random
# Shuffle the agents to create random discussion order
discussion_order = self.agents.copy()
random.shuffle(discussion_order)
discussion_transcript = []
current_context = analysis_prompt
# Each agent contributes to the analysis, building on previous contributions
for i, agent in enumerate(discussion_order):
if i == 0:
# First agent starts the analysis
agent_prompt = f"""
{current_context}
As {agent.agent_name}, you are starting the team analysis discussion.
Based on your expertise and role, provide your initial analysis of the implementation results.
Focus on what you can contribute from your unique perspective.
"""
else:
# Subsequent agents build on the analysis
previous_contributions = "\n\n".join(
discussion_transcript
)
agent_prompt = f"""
{current_context}
Previous Analysis:
{previous_contributions}
As {agent.agent_name}, continue the team analysis by building on your colleagues' insights.
Consider:
1. How do your colleagues' perspectives relate to your expertise?
2. What additional insights can you add to the analysis?
3. How can you address any concerns or questions raised by others?
4. What specific recommendations would you make based on the analysis so far?
Engage directly with your colleagues' ideas and contribute your unique perspective.
"""
# Get the agent's contribution
contribution = agent.run(agent_prompt)
# Add to transcript with clear attribution
discussion_transcript.append(
f"{agent.agent_name}: {contribution}"
)
# Update context for next iteration
current_context = (
f"{analysis_prompt}\n\nTeam Analysis:\n"
+ "\n\n".join(discussion_transcript)
)
# Create a summary of the analysis
summary_prompt = f"""
Analysis Prompt: {analysis_prompt}
Complete Analysis Transcript:
{chr(10).join(discussion_transcript)}
As a research coordinator, provide a concise summary of the key points from this team analysis:
1. Main findings and insights from the team
2. Key recommendations made
3. Areas of agreement and disagreement
4. Next steps for the research
Focus on actionable insights and clear next steps.
"""
# Use Oppenheimer to summarize the analysis
analysis_summary = self.oppenheimer.run(summary_prompt)
# Return the full analysis transcript with summary
full_analysis = f"Team Analysis Transcript:\n\n{chr(10).join(discussion_transcript)}\n\n---\nAnalysis Summary:\n{analysis_summary}"
return full_analysis
def _refine_task_for_next_iteration(
self, current_task: str, loop_results: Dict[str, Any]
) -> str:
"""
Refine the task for the next research iteration.
Args:
current_task: Current research task
loop_results: Results from the current loop
Returns:
Refined task for next iteration
"""
refinement_prompt = f"""
Current Research Task: {current_task}
Results from Current Loop: {loop_results}
Based on the findings and analysis from this research loop, refine the research task for the next iteration.
Consider:
- What new questions have emerged?
- What aspects need deeper investigation?
- What alternative approaches should be explored?
- What specific hypotheses should be tested?
Provide a refined, focused research question that builds upon the current findings
and addresses the most important next steps identified by the team.
"""
# Use Oppenheimer to refine the task
refined_task = self.oppenheimer.run(refinement_prompt)
# Add task refinement to conversation history
self.conversation.add(
"J. Robert Oppenheimer",
f"Task Refined for Next Iteration: {refined_task}",
)
return refined_task
def _generate_final_conclusion(
self, research_results: Dict[str, Any]
) -> str:
"""
Generate a final conclusion summarizing all research findings.
Args:
research_results: Complete research results from all loops
Returns:
Final research conclusion
"""
conclusion_prompt = f"""
Complete Research Results: {research_results}
As J. Robert Oppenheimer, provide a comprehensive final conclusion for this research project.
Your conclusion should:
1. Summarize the key findings from all research loops
2. Identify the most significant discoveries or insights
3. Evaluate the success of the research approach
4. Highlight any limitations or areas for future investigation
5. Provide a clear statement of what was accomplished
6. Suggest next steps for continued research
Synthesize the work of the entire team and provide a coherent narrative
of the research journey and its outcomes.
"""
final_conclusion = self.oppenheimer.run(conclusion_prompt)
return final_conclusion
# Example usage function
def run_bell_labs_research(
research_question: str,
max_loops: int = 3,
model_name: str = "gpt-4o-mini",
verbose: bool = True,
) -> Dict[str, Any]:
"""
Run a Bell Labs research simulation.
Args:
research_question: The research question to investigate
max_loops: Number of research iteration loops
model_name: Model to use for all agents
verbose: Whether to enable verbose logging
Returns:
Complete research results and findings
"""
bell_labs = BellLabsSwarm(
max_loops=max_loops, verbose=verbose, model_name=model_name
)
results = bell_labs.run(research_question)
return results
# if __name__ == "__main__":
# # Example research question
# research_question = """
# Investigate the feasibility of quantum computing for solving complex optimization problems.
# Consider both theoretical foundations and practical implementation challenges.
# """
# print("Starting Bell Labs Research Simulation...")
# print(f"Research Question: {research_question}")
# print("-" * 80)
# results = run_bell_labs_research(
# research_question=research_question,
# max_loops=2,
# verbose=True
# )
# print("\n" + "=" * 80)
# print("RESEARCH SIMULATION COMPLETED")
# print("=" * 80)
# print(f"\nFinal Conclusion:\n{results['final_conclusion']}")
# print(f"\nResearch completed in {len(results['research_history'])} loops.")
# print("Check the results dictionary for complete research details.")

@ -59,7 +59,9 @@ from swarms.structs.multi_agent_exec import (
)
from swarms.structs.multi_agent_router import MultiAgentRouter
from swarms.structs.round_robin import RoundRobinSwarm
from swarms.structs.self_moa_seq import SelfMoASeq
from swarms.structs.sequential_workflow import SequentialWorkflow
from swarms.structs.social_algorithms import SocialAlgorithms
from swarms.structs.spreadsheet_swarm import SpreadSheetSwarm
from swarms.structs.stopping_conditions import (
check_cancelled,
@ -98,13 +100,13 @@ from swarms.structs.swarming_architectures import (
staircase_swarm,
star_swarm,
)
from swarms.structs.self_moa_seq import SelfMoASeq
__all__ = [
"Agent",
"BaseStructure",
"BaseSwarm",
"ConcurrentWorkflow",
"SocialAlgorithms",
"Conversation",
"GroupChat",
"MajorityVoting",

@ -697,6 +697,7 @@ class Agent:
agents=self.handoffs,
model=self.model_name,
temperature=self.temperature,
system_prompt=self.system_prompt,
output_type=self.output_type,
)
@ -822,7 +823,17 @@ class Agent:
tools_list.extend(self.tools_list_dictionary)
if exists(self.mcp_url) or exists(self.mcp_urls):
tools_list.extend(self.add_mcp_tools_to_memory())
if self.verbose:
logger.info(
f"Adding MCP tools to memory for {self.agent_name}"
)
# tools_list.extend(self.add_mcp_tools_to_memory())
mcp_tools = self.add_mcp_tools_to_memory()
if self.verbose:
logger.info(f"MCP tools: {mcp_tools}")
tools_list.extend(mcp_tools)
# Additional arguments for LiteLLM initialization
additional_args = {}
@ -888,37 +899,37 @@ class Agent:
Exception: If there's an error accessing the MCP tools
"""
try:
# Determine which MCP configuration to use
if exists(self.mcp_url):
tools = get_mcp_tools_sync(server_path=self.mcp_url)
elif exists(self.mcp_config):
tools = get_mcp_tools_sync(connection=self.mcp_config)
# logger.info(f"Tools: {tools}")
elif exists(self.mcp_urls):
logger.info(
f"Getting MCP tools for multiple MCP servers for {self.agent_name}"
)
tools = get_tools_for_multiple_mcp_servers(
urls=self.mcp_urls,
output_type="str",
)
# print(f"Tools: {tools} for {self.mcp_urls}")
if self.verbose:
logger.info(f"MCP tools: {tools}")
else:
raise AgentMCPConnectionError(
"mcp_url must be either a string URL or MCPConnection object"
)
if (
exists(self.mcp_url)
or exists(self.mcp_urls)
or exists(self.mcp_config)
):
if self.print_on is True:
self.pretty_print(
f"✨ [SYSTEM] Successfully integrated {len(tools)} MCP tools into agent: {self.agent_name} | Status: ONLINE | Time: {time.strftime('%H:%M:%S')}",
loop_count=0,
)
# Print success message if any MCP configuration exists
if self.print_on:
self.pretty_print(
f"✨ [SYSTEM] Successfully integrated {len(tools)} MCP tools into agent: {self.agent_name} | Status: ONLINE | Time: {time.strftime('%H:%M:%S')}",
loop_count=0,
)
return tools
except AgentMCPConnectionError as e:
logger.error(
f"Error in MCP connection: {e} Traceback: {traceback.format_exc()}"
f"Error Adding MCP Tools to Agent: {self.agent_name} Error: {e} Traceback: {traceback.format_exc()}"
)
raise e
@ -2653,6 +2664,7 @@ class Agent:
imgs: Optional[List[str]] = None,
correct_answer: Optional[str] = None,
streaming_callback: Optional[Callable[[str], None]] = None,
n: int = 1,
*args,
**kwargs,
) -> Any:
@ -2697,6 +2709,8 @@ class Agent:
)
elif exists(self.handoffs):
output = self.handle_handoffs(task=task)
elif n > 1:
output = [self.run(task=task) for _ in range(n)]
else:
output = self._run(
task=task,
@ -2717,65 +2731,22 @@ class Agent:
Exception,
) as e:
# Try fallback models if available
if (
self.is_fallback_available()
and self.switch_to_next_model()
):
# Always log fallback events, regardless of verbose setting
if self.verbose:
logger.warning(
f"⚠️ [FALLBACK] Agent '{self.agent_name}' failed with model '{self.get_current_model()}'. "
f"Switching to fallback model '{self.get_current_model()}' (attempt {self.current_model_index + 1}/{len(self.get_available_models())})"
)
try:
# Recursive call to run() with the new model
result = self.run(
task=task,
img=img,
imgs=imgs,
correct_answer=correct_answer,
streaming_callback=streaming_callback,
*args,
**kwargs,
)
if self.verbose:
# Log successful completion with fallback model
logger.info(
f"✅ [FALLBACK SUCCESS] Agent '{self.agent_name}' successfully completed task "
f"using fallback model '{self.get_current_model()}'"
)
return result
except Exception as fallback_error:
logger.error(
f"Fallback model '{self.get_current_model()}' also failed: {fallback_error}"
)
# Continue to next fallback or raise if no more models
if (
self.is_fallback_available()
and self.switch_to_next_model()
):
return self.run(
task=task,
img=img,
imgs=imgs,
correct_answer=correct_answer,
streaming_callback=streaming_callback,
*args,
**kwargs,
)
else:
if self.verbose:
logger.error(
f"❌ [FALLBACK EXHAUSTED] Agent '{self.agent_name}' has exhausted all available models. "
f"Tried {len(self.get_available_models())} models: {self.get_available_models()}"
)
self._handle_run_error(e)
if self.is_fallback_available():
return self._handle_fallback_execution(
task=task,
img=img,
imgs=imgs,
correct_answer=correct_answer,
streaming_callback=streaming_callback,
original_error=e,
*args,
**kwargs,
)
else:
if self.verbose:
# No fallback available
logger.error(
f"❌ [NO FALLBACK] Agent '{self.agent_name}' failed with model '{self.get_current_model()}' "
f"Agent Name: {self.agent_name} [NO FALLBACK] failed with model '{self.get_current_model()}' "
f"and no fallback models are configured. Error: {str(e)[:100]}{'...' if len(str(e)) > 100 else ''}"
)
@ -2783,13 +2754,111 @@ class Agent:
except KeyboardInterrupt:
logger.warning(
f"Keyboard interrupt detected for agent '{self.agent_name}'. "
f"Agent Name: {self.agent_name} Keyboard interrupt detected. "
"If autosave is enabled, the agent's state will be saved to the workspace directory. "
"To enable autosave, please initialize the agent with Agent(autosave=True)."
"For technical support, refer to this document: https://docs.swarms.world/en/latest/swarms/support/"
)
raise KeyboardInterrupt
def _handle_fallback_execution(
self,
task: Optional[Union[str, Any]] = None,
img: Optional[str] = None,
imgs: Optional[List[str]] = None,
correct_answer: Optional[str] = None,
streaming_callback: Optional[Callable[[str], None]] = None,
original_error: Exception = None,
*args,
**kwargs,
) -> Any:
"""
Handles fallback execution when the primary model fails.
This method attempts to execute the task using fallback models when the primary
model encounters an error. It will try each available fallback model in sequence
until either the task succeeds or all fallback models are exhausted.
Args:
task (Optional[Union[str, Any]], optional): The task to be executed. Defaults to None.
img (Optional[str], optional): The image to be processed. Defaults to None.
imgs (Optional[List[str]], optional): The list of images to be processed. Defaults to None.
correct_answer (Optional[str], optional): The correct answer for continuous run mode. Defaults to None.
streaming_callback (Optional[Callable[[str], None]], optional): Callback function to receive streaming tokens in real-time. Defaults to None.
original_error (Exception): The original error that triggered the fallback. Defaults to None.
*args: Additional positional arguments to be passed to the execution method.
**kwargs: Additional keyword arguments to be passed to the execution method.
Returns:
Any: The result of the execution if successful.
Raises:
Exception: If all fallback models fail or no fallback models are available.
"""
# Check if fallback models are available
if not self.is_fallback_available():
if self.verbose:
logger.error(
f"Agent Name: {self.agent_name} [NO FALLBACK] failed with model '{self.get_current_model()}' "
f"and no fallback models are configured. Error: {str(original_error)[:100]}{'...' if len(str(original_error)) > 100 else ''}"
)
self._handle_run_error(original_error)
return None
# Try to switch to the next fallback model
if not self.switch_to_next_model():
if self.verbose:
logger.error(
f"Agent Name: {self.agent_name} [FALLBACK EXHAUSTED] has exhausted all available models. "
f"Tried {len(self.get_available_models())} models: {self.get_available_models()}"
)
self._handle_run_error(original_error)
return None
# Log fallback attempt
if self.verbose:
logger.warning(
f"Agent Name: {self.agent_name} [FALLBACK] failed with model '{self.get_current_model()}'. "
f"Switching to fallback model '{self.get_current_model()}' (attempt {self.current_model_index + 1}/{len(self.get_available_models())})"
)
try:
# Recursive call to run() with the new model
result = self.run(
task=task,
img=img,
imgs=imgs,
correct_answer=correct_answer,
streaming_callback=streaming_callback,
*args,
**kwargs,
)
if self.verbose:
# Log successful completion with fallback model
logger.info(
f"Agent Name: {self.agent_name} [FALLBACK SUCCESS] successfully completed task "
f"using fallback model '{self.get_current_model()}'"
)
return result
except Exception as fallback_error:
logger.error(
f"Agent Name: {self.agent_name} Fallback model '{self.get_current_model()}' also failed: {fallback_error}"
)
# Try the next fallback model recursively
return self._handle_fallback_execution(
task=task,
img=img,
imgs=imgs,
correct_answer=correct_answer,
streaming_callback=streaming_callback,
original_error=original_error,
*args,
**kwargs,
)
def run_batched(
self,
tasks: List[str],

@ -1,87 +1,146 @@
from typing import List, Optional
import math
from typing import Any, Callable, List, Optional, Union
from litellm import embedding
from tenacity import retry, stop_after_attempt, wait_exponential
from typing import Union, Callable, Any
from swarms import Agent
from swarms.utils.loguru_logger import initialize_logger
from swarms.utils.auto_download_check_packages import (
auto_check_and_download_package,
)
from swarms.structs.omni_agent_types import AgentType
from swarms.utils.loguru_logger import initialize_logger
logger = initialize_logger(log_folder="agent_router")
class AgentRouter:
"""
Initialize the AgentRouter.
Initialize the AgentRouter using LiteLLM embeddings for agent matching.
Args:
collection_name (str): Name of the collection in the vector database.
persist_directory (str): Directory to persist the vector database.
embedding_model (str): The embedding model to use for generating embeddings.
Examples: 'text-embedding-ada-002', 'text-embedding-3-small', 'text-embedding-3-large',
'cohere/embed-english-v3.0', 'huggingface/microsoft/codebert-base', etc.
n_agents (int): Number of agents to return in queries.
*args: Additional arguments to pass to the chromadb Client.
**kwargs: Additional keyword arguments to pass to the chromadb Client.
api_key (str, optional): API key for the embedding service. If not provided,
will use environment variables.
api_base (str, optional): Custom API base URL for the embedding service.
agents (List[AgentType], optional): List of agents to initialize the router with.
"""
def __init__(
self,
collection_name: str = "agents",
persist_directory: str = "./vector_db",
embedding_model: str = "text-embedding-ada-002",
n_agents: int = 1,
*args,
**kwargs,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
agents: Optional[List[AgentType]] = None,
):
self.embedding_model = embedding_model
self.n_agents = n_agents
self.api_key = api_key
self.api_base = api_base
self.agents: List[AgentType] = []
self.agent_embeddings: List[List[float]] = []
self.agent_metadata: List[dict] = []
# Add agents if provided during initialization
if agents:
self.add_agents(agents)
def _generate_embedding(self, text: str) -> List[float]:
"""
Generate embedding for the given text using the specified model.
Args:
text (str): The text to generate embedding for.
Returns:
List[float]: The embedding vector as a list of floats.
"""
try:
import chromadb
except ImportError:
auto_check_and_download_package(
"chromadb", package_manager="pip", upgrade=True
)
import chromadb
# Prepare parameters for the embedding call
params = {"model": self.embedding_model, "input": [text]}
self.collection_name = collection_name
self.n_agents = n_agents
self.persist_directory = persist_directory
self.client = chromadb.Client(*args, **kwargs)
self.collection = self.client.create_collection(
collection_name
)
self.agents: List[Agent] = []
if self.api_key:
params["api_key"] = self.api_key
if self.api_base:
params["api_base"] = self.api_base
response = embedding(**params)
# Extract the embedding from the response
embedding_vector = response.data[0].embedding
return embedding_vector
except Exception as e:
logger.error(f"Error generating embedding: {str(e)}")
raise
def _cosine_similarity(
self, vec1: List[float], vec2: List[float]
) -> float:
"""
Calculate cosine similarity between two vectors.
Args:
vec1 (List[float]): First vector.
vec2 (List[float]): Second vector.
Returns:
float: Cosine similarity between the vectors.
"""
if len(vec1) != len(vec2):
raise ValueError("Vectors must have the same length")
# Calculate dot product
dot_product = sum(a * b for a, b in zip(vec1, vec2))
# Calculate magnitudes
magnitude1 = math.sqrt(sum(a * a for a in vec1))
magnitude2 = math.sqrt(sum(a * a for a in vec2))
# Avoid division by zero
if magnitude1 == 0 or magnitude2 == 0:
return 0.0
return dot_product / (magnitude1 * magnitude2)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
)
def add_agent(self, agent: Agent) -> None:
def add_agent(self, agent: AgentType) -> None:
"""
Add an agent to the vector database.
Add an agent to the embedding-based agent router.
Args:
agent (Agent): The agent to add.
Raises:
Exception: If there's an error adding the agent to the vector database.
Exception: If there's an error adding the agent to the router.
"""
try:
agent_text = f"{agent.name} {agent.description} {agent.system_prompt}"
self.collection.add(
documents=[agent_text],
metadatas=[{"name": agent.name}],
ids=[agent.name],
)
# Generate embedding for the agent
agent_embedding = self._generate_embedding(agent_text)
# Store agent and its embedding
self.agents.append(agent)
self.agent_embeddings.append(agent_embedding)
self.agent_metadata.append(
{"name": agent.name, "text": agent_text}
)
logger.info(
f"Added agent {agent.name} to the vector database."
f"Added agent {agent.name} to the embedding-based router."
)
except Exception as e:
logger.error(
f"Error adding agent {agent.name} to the vector database: {str(e)}"
f"Error adding agent {agent.name} to the router: {str(e)}"
)
raise
def add_agents(
self, agents: List[Union[Agent, Callable, Any]]
self, agents: List[Union[AgentType, Callable, Any]]
) -> None:
"""
Add multiple agents to the vector database.
@ -94,7 +153,7 @@ class AgentRouter:
def update_agent_history(self, agent_name: str) -> None:
"""
Update the agent's entry in the vector database with its interaction history.
Update the agent's entry in the router with its interaction history.
Args:
agent_name (str): The name of the agent to update.
@ -107,17 +166,39 @@ class AgentRouter:
history_text = " ".join(history)
updated_text = f"{agent.name} {agent.description} {agent.system_prompt} {history_text}"
self.collection.update(
ids=[agent_name],
documents=[updated_text],
metadatas=[{"name": agent_name}],
)
logger.info(
f"Updated agent {agent_name} with interaction history."
# Find the agent's index
agent_index = next(
(
i
for i, a in enumerate(self.agents)
if a.name == agent_name
),
None,
)
if agent_index is not None:
# Generate new embedding with updated text
updated_embedding = self._generate_embedding(
updated_text
)
# Update the stored data
self.agent_embeddings[agent_index] = updated_embedding
self.agent_metadata[agent_index] = {
"name": agent_name,
"text": updated_text,
}
logger.info(
f"Updated agent {agent_name} with interaction history."
)
else:
logger.warning(
f"Agent {agent_name} not found in the agents list."
)
else:
logger.warning(
f"Agent {agent_name} not found in the database."
f"Agent {agent_name} not found in the router."
)
@retry(
@ -126,14 +207,14 @@ class AgentRouter:
)
def find_best_agent(
self, task: str, *args, **kwargs
) -> Optional[Agent]:
) -> Optional[AgentType]:
"""
Find the best agent for a given task.
Find the best agent for a given task using cosine similarity.
Args:
task (str): The task description.
*args: Additional arguments to pass to the collection.query method.
**kwargs: Additional keyword arguments to pass to the collection.query method.
*args: Additional arguments (unused, kept for compatibility).
**kwargs: Additional keyword arguments (unused, kept for compatibility).
Returns:
Optional[Agent]: The best matching agent, if found.
@ -142,32 +223,32 @@ class AgentRouter:
Exception: If there's an error finding the best agent.
"""
try:
results = self.collection.query(
query_texts=[task],
n_results=self.n_agents,
*args,
**kwargs,
)
if not self.agents or not self.agent_embeddings:
logger.warning("No agents available in the router.")
return None
# Generate embedding for the task
task_embedding = self._generate_embedding(task)
# Calculate cosine similarities
similarities = []
for agent_embedding in self.agent_embeddings:
similarity = self._cosine_similarity(
task_embedding, agent_embedding
)
similarities.append(similarity)
# Find the best matching agent(s)
if similarities:
# Get index of the best similarity
best_index = similarities.index(max(similarities))
best_agent = self.agents[best_index]
best_similarity = similarities[best_index]
if results["ids"]:
best_match_name = results["ids"][0][0]
best_agent = next(
(
a
for a in self.agents
if a.name == best_match_name
),
None,
logger.info(
f"Found best matching agent: {best_agent.name} (similarity: {best_similarity:.4f})"
)
if best_agent:
logger.info(
f"Found best matching agent: {best_match_name}"
)
return best_agent
else:
logger.warning(
f"Agent {best_match_name} found in index but not in agents list."
)
return best_agent
else:
logger.warning(
"No matching agent found for the given task."

@ -1,4 +1,5 @@
import asyncio
import socket
import sys
import threading
import time
@ -556,6 +557,7 @@ class AOP:
3. Handle tool execution with proper error handling
4. Manage the MCP server lifecycle
5. Queue-based task execution for improved performance and reliability
6. Persistence mode with automatic restart and failsafe protection
Attributes:
mcp_server: The FastMCP server instance
@ -564,6 +566,13 @@ class AOP:
task_queues: Dictionary mapping tool names to their task queues
server_name: Name of the MCP server
queue_enabled: Whether queue-based execution is enabled
persistence: Whether persistence mode is enabled
max_restart_attempts: Maximum number of restart attempts before giving up
restart_delay: Delay between restart attempts in seconds
network_monitoring: Whether network connection monitoring is enabled
max_network_retries: Maximum number of network reconnection attempts
network_retry_delay: Delay between network retry attempts in seconds
network_timeout: Network connection timeout in seconds
"""
def __init__(
@ -581,6 +590,13 @@ class AOP:
max_queue_size_per_agent: int = 1000,
processing_timeout: int = 30,
retry_delay: float = 1.0,
persistence: bool = False,
max_restart_attempts: int = 10,
restart_delay: float = 5.0,
network_monitoring: bool = True,
max_network_retries: int = 5,
network_retry_delay: float = 10.0,
network_timeout: float = 30.0,
log_level: Literal[
"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
] = "INFO",
@ -605,6 +621,13 @@ class AOP:
max_queue_size_per_agent: Maximum queue size per agent
processing_timeout: Timeout for task processing in seconds
retry_delay: Delay between retries in seconds
persistence: Enable automatic restart on shutdown (with failsafe)
max_restart_attempts: Maximum number of restart attempts before giving up
restart_delay: Delay between restart attempts in seconds
network_monitoring: Enable network connection monitoring and retry
max_network_retries: Maximum number of network reconnection attempts
network_retry_delay: Delay between network retry attempts in seconds
network_timeout: Network connection timeout in seconds
"""
self.server_name = server_name
self.description = description
@ -618,6 +641,23 @@ class AOP:
self.max_queue_size_per_agent = max_queue_size_per_agent
self.processing_timeout = processing_timeout
self.retry_delay = retry_delay
self.persistence = persistence
self.max_restart_attempts = max_restart_attempts
self.restart_delay = restart_delay
self.network_monitoring = network_monitoring
self.max_network_retries = max_network_retries
self.network_retry_delay = network_retry_delay
self.network_timeout = network_timeout
# Persistence state tracking
self._restart_count = 0
self._persistence_enabled = persistence
self._shutdown_requested = False
# Network state tracking
self._network_retry_count = 0
self._last_network_error = None
self._network_connected = True
self.agents: Dict[str, Agent] = {}
self.tool_configs: Dict[str, AgentToolConfig] = {}
@ -641,7 +681,7 @@ class AOP:
)
logger.info(
f"Initialized AOP with server name: {server_name}, verbose: {verbose}, traceback: {traceback_enabled}"
f"Initialized AOP with server name: {server_name}, verbose: {verbose}, traceback: {traceback_enabled}, persistence: {persistence}, network_monitoring: {network_monitoring}"
)
# Add initial agents if provided
@ -2262,9 +2302,397 @@ class AOP:
def run(self) -> None:
"""
Run the MCP server.
Run the MCP server with optional persistence.
If persistence is enabled, the server will automatically restart
when stopped, up to max_restart_attempts times. This includes
a failsafe mechanism to prevent infinite restart loops.
"""
if not self._persistence_enabled:
# Standard run without persistence
self.start_server()
return
# Persistence-enabled run
logger.info(
f"Starting AOP server with persistence enabled (max restarts: {self.max_restart_attempts})"
)
while (
not self._shutdown_requested
and self._restart_count <= self.max_restart_attempts
):
try:
if self._restart_count > 0:
logger.info(
f"Restarting server (attempt {self._restart_count}/{self.max_restart_attempts})"
)
# Wait before restarting
time.sleep(self.restart_delay)
# Reset restart count on successful start
self._restart_count = 0
self.start_server()
except KeyboardInterrupt:
if (
self._persistence_enabled
and not self._shutdown_requested
):
logger.warning(
"Server interrupted by user, but persistence is enabled. Restarting..."
)
self._restart_count += 1
continue
else:
logger.info("Server shutdown requested by user")
break
except Exception as e:
if (
self._persistence_enabled
and not self._shutdown_requested
):
# Check if it's a network error
if self._is_network_error(e):
logger.warning(
"🌐 Network error detected, attempting reconnection..."
)
if self._handle_network_error(e):
# Network retry successful, continue with restart
self._restart_count += 1
continue
else:
# Network retry failed, give up
logger.critical(
"💀 Network reconnection failed permanently"
)
break
else:
# Non-network error, use standard restart logic
logger.error(
f"Server crashed with error: {e}"
)
self._restart_count += 1
if (
self._restart_count
> self.max_restart_attempts
):
logger.critical(
f"Maximum restart attempts ({self.max_restart_attempts}) exceeded. Shutting down permanently."
)
break
else:
logger.info(
f"Will restart in {self.restart_delay} seconds..."
)
continue
else:
# Check if it's a network error even without persistence
if self._is_network_error(e):
logger.error(
"🌐 Network error detected but persistence is disabled"
)
if self.network_monitoring:
logger.info(
"🔄 Attempting network reconnection..."
)
if self._handle_network_error(e):
# Try to start server again after network recovery
try:
self.start_server()
return
except Exception as retry_error:
logger.error(
f"Server failed after network recovery: {retry_error}"
)
raise
else:
logger.critical(
"💀 Network reconnection failed"
)
raise
else:
logger.error(
"Network monitoring is disabled, cannot retry"
)
raise
else:
logger.error(
f"Server failed and persistence is disabled: {e}"
)
raise
if self._restart_count > self.max_restart_attempts:
logger.critical(
"Server failed permanently due to exceeding maximum restart attempts"
)
elif self._shutdown_requested:
logger.info("Server shutdown completed as requested")
else:
logger.info("Server stopped normally")
def _is_network_error(self, error: Exception) -> bool:
"""
Check if an error is network-related.
Args:
error: The exception to check
Returns:
bool: True if the error is network-related
"""
network_errors = (
ConnectionError,
ConnectionRefusedError,
ConnectionResetError,
ConnectionAbortedError,
TimeoutError,
socket.gaierror,
socket.timeout,
OSError,
)
# Check if it's a direct network error
if isinstance(error, network_errors):
return True
# Check error message for network-related keywords
error_msg = str(error).lower()
network_keywords = [
"connection refused",
"connection reset",
"connection aborted",
"network is unreachable",
"no route to host",
"timeout",
"socket",
"network",
"connection",
"refused",
"reset",
"aborted",
"unreachable",
"timeout",
]
return any(
keyword in error_msg for keyword in network_keywords
)
def _get_network_error_message(
self, error: Exception, attempt: int
) -> str:
"""
Get a custom error message for network-related errors.
Args:
error: The network error that occurred
attempt: Current retry attempt number
Returns:
str: Custom error message
"""
error_type = type(error).__name__
error_msg = str(error)
if isinstance(error, ConnectionRefusedError):
return f"🌐 NETWORK ERROR: Connection refused to {self.host}:{self.port} (attempt {attempt}/{self.max_network_retries})"
elif isinstance(error, ConnectionResetError):
return f"🌐 NETWORK ERROR: Connection was reset by remote host (attempt {attempt}/{self.max_network_retries})"
elif isinstance(error, ConnectionAbortedError):
return f"🌐 NETWORK ERROR: Connection was aborted (attempt {attempt}/{self.max_network_retries})"
elif isinstance(error, TimeoutError):
return f"🌐 NETWORK ERROR: Connection timeout after {self.network_timeout}s (attempt {attempt}/{self.max_network_retries})"
elif isinstance(error, socket.gaierror):
return f"🌐 NETWORK ERROR: Host resolution failed for {self.host} (attempt {attempt}/{self.max_network_retries})"
elif isinstance(error, OSError):
return f"🌐 NETWORK ERROR: OS-level network error - {error_msg} (attempt {attempt}/{self.max_network_retries})"
else:
return f"🌐 NETWORK ERROR: {error_type} - {error_msg} (attempt {attempt}/{self.max_network_retries})"
def _test_network_connectivity(self) -> bool:
"""
Test network connectivity to the server host and port.
Returns:
bool: True if network is reachable, False otherwise
"""
try:
# Test if we can resolve the host
socket.gethostbyname(self.host)
# Test if we can connect to the port
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(self.network_timeout)
result = sock.connect_ex((self.host, self.port))
sock.close()
return result == 0
except Exception as e:
if self.verbose:
logger.debug(f"Network connectivity test failed: {e}")
return False
def _handle_network_error(self, error: Exception) -> bool:
"""
Handle network errors with retry logic.
Args:
error: The network error that occurred
Returns:
bool: True if should retry, False if should give up
"""
if not self.network_monitoring:
return False
self._network_retry_count += 1
self._last_network_error = error
self._network_connected = False
# Get custom error message
error_msg = self._get_network_error_message(
error, self._network_retry_count
)
logger.error(error_msg)
# Check if we should retry
if self._network_retry_count <= self.max_network_retries:
logger.warning(
f"🔄 Attempting to reconnect in {self.network_retry_delay} seconds..."
)
logger.info(
f"📊 Network retry {self._network_retry_count}/{self.max_network_retries}"
)
# Wait before retry
time.sleep(self.network_retry_delay)
# Test connectivity before retry
if self._test_network_connectivity():
logger.info("✅ Network connectivity restored!")
self._network_connected = True
self._network_retry_count = (
0 # Reset on successful test
)
return True
else:
logger.warning(
"❌ Network connectivity test failed, will retry..."
)
return True
else:
logger.critical(
f"💀 Maximum network retry attempts ({self.max_network_retries}) exceeded!"
)
logger.critical(
"🚫 Giving up on network reconnection. Server will shut down."
)
return False
def get_network_status(self) -> Dict[str, Any]:
"""
Get current network status and statistics.
Returns:
Dict containing network status information
"""
return {
"network_monitoring_enabled": self.network_monitoring,
"network_connected": self._network_connected,
"network_retry_count": self._network_retry_count,
"max_network_retries": self.max_network_retries,
"network_retry_delay": self.network_retry_delay,
"network_timeout": self.network_timeout,
"last_network_error": (
str(self._last_network_error)
if self._last_network_error
else None
),
"remaining_network_retries": max(
0,
self.max_network_retries - self._network_retry_count,
),
"host": self.host,
"port": self.port,
}
def reset_network_retry_count(self) -> None:
"""
Reset the network retry counter.
This can be useful if you want to give the server a fresh
set of network retry attempts.
"""
self._network_retry_count = 0
self._last_network_error = None
self._network_connected = True
logger.info("Network retry counter reset")
def enable_persistence(self) -> None:
"""
Enable persistence mode for the server.
This allows the server to automatically restart when stopped,
up to the maximum number of restart attempts.
"""
self._persistence_enabled = True
logger.info("Persistence mode enabled")
def disable_persistence(self) -> None:
"""
Disable persistence mode for the server.
This will allow the server to shut down normally without
automatic restarts.
"""
self._persistence_enabled = False
self._shutdown_requested = True
logger.info(
"Persistence mode disabled - server will shut down on next stop"
)
def request_shutdown(self) -> None:
"""
Request a graceful shutdown of the server.
If persistence is enabled, this will prevent automatic restarts
and allow the server to shut down normally.
"""
self._shutdown_requested = True
logger.info(
"Shutdown requested - server will stop after current operations complete"
)
def get_persistence_status(self) -> Dict[str, Any]:
"""
Get the current persistence status and statistics.
Returns:
Dict containing persistence configuration and status
"""
return {
"persistence_enabled": self._persistence_enabled,
"shutdown_requested": self._shutdown_requested,
"restart_count": self._restart_count,
"max_restart_attempts": self.max_restart_attempts,
"restart_delay": self.restart_delay,
"remaining_restarts": max(
0, self.max_restart_attempts - self._restart_count
),
}
def reset_restart_count(self) -> None:
"""
Reset the restart counter.
This can be useful if you want to give the server a fresh
set of restart attempts.
"""
self.start_server()
self._restart_count = 0
logger.info("Restart counter reset")
def get_server_info(self) -> Dict[str, Any]:
"""
@ -2283,6 +2711,8 @@ class AOP:
"log_level": self.log_level,
"transport": self.transport,
"queue_enabled": self.queue_enabled,
"persistence": self.get_persistence_status(),
"network": self.get_network_status(),
"tool_details": {
tool_name: self.get_agent_info(tool_name)
for tool_name in self.agents.keys()

@ -1,6 +1,5 @@
import concurrent.futures
import datetime
import inspect
import json
import os
import traceback
@ -95,6 +94,7 @@ class Conversation:
export_method: str = "json",
dynamic_context_window: bool = True,
caching: bool = True,
output_metadata: bool = False,
):
# Initialize all attributes first
@ -118,6 +118,7 @@ class Conversation:
self.export_method = export_method
self.dynamic_context_window = dynamic_context_window
self.caching = caching
self.output_metadata = output_metadata
if self.name is None:
self.name = id
@ -534,7 +535,7 @@ class Conversation:
"""
return self.return_history_as_string()
def to_dict(self) -> Dict[str, Any]:
def to_dict(self) -> Dict[Any, Any]:
"""
Converts all attributes of the class into a dictionary, including all __init__ parameters
and conversation history. Automatically extracts parameters from __init__ signature.
@ -544,43 +545,7 @@ class Conversation:
- metadata: All initialization parameters and their current values
- conversation_history: The list of conversation messages
"""
# Get all parameters from __init__ signature
init_signature = inspect.signature(self.__class__.__init__)
init_params = [
param
for param in init_signature.parameters
if param not in ["self", "args", "kwargs"]
]
# Build metadata dictionary from init parameters
metadata = {}
for param in init_params:
# Get the current value of the parameter from instance
value = getattr(self, param, None)
# Special handling for certain types
if value is not None:
if isinstance(
value, (str, int, float, bool, list, dict)
):
metadata[param] = value
elif hasattr(value, "to_dict"):
metadata[param] = value.to_dict()
else:
try:
# Try to convert to string if not directly serializable
metadata[param] = str(value)
except Exception:
# Skip if we can't serialize
continue
# Add created_at if it exists
if hasattr(self, "created_at"):
metadata["created_at"] = self.created_at
return {
"metadata": metadata,
"conversation_history": self.conversation_history,
}
return self.conversation_history
def save_as_json(self, force: bool = True):
"""Save the conversation history and metadata to a JSON file.
@ -597,14 +562,11 @@ class Conversation:
)
return
# Get the full data including metadata and conversation history
data = self.get_init_params()
# Ensure we have a valid save path
if not self.save_filepath:
self.save_filepath = os.path.join(
self.conversations_dir or os.getcwd(),
f"conversation_{self.name}.json",
f"conversation_{self.id}.json",
)
# Create directory if it doesn't exist
@ -614,7 +576,12 @@ class Conversation:
# Save with proper formatting
with open(self.save_filepath, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4, default=str)
json.dump(
self.conversation_history,
f,
indent=4,
default=str,
)
logger.info(f"Conversation saved to {self.save_filepath}")
@ -624,34 +591,6 @@ class Conversation:
)
raise # Re-raise to ensure the error is visible to the caller
def get_init_params(self):
data = {
"metadata": {
"id": self.id,
"name": self.name,
"system_prompt": self.system_prompt,
"time_enabled": self.time_enabled,
"autosave": self.autosave,
"save_filepath": self.save_filepath,
"load_filepath": self.load_filepath,
"context_length": self.context_length,
"rules": self.rules,
"custom_rules_prompt": self.custom_rules_prompt,
"user": self.user,
"save_as_yaml_on": self.save_as_yaml_on,
"save_as_json_bool": self.save_as_json_bool,
"token_count": self.token_count,
"message_id_on": self.message_id_on,
"tokenizer_model_name": self.tokenizer_model_name,
"conversations_dir": self.conversations_dir,
"export_method": self.export_method,
"created_at": self.created_at,
},
"conversation_history": self.conversation_history,
}
return data
def save_as_yaml(self, force: bool = True):
"""Save the conversation history and metadata to a YAML file.
@ -667,9 +606,6 @@ class Conversation:
)
return
# Get the full data including metadata and conversation history
data = self.get_init_params()
# Create directory if it doesn't exist
save_dir = os.path.dirname(self.save_filepath)
if save_dir:
@ -678,7 +614,7 @@ class Conversation:
# Save with proper formatting
with open(self.save_filepath, "w", encoding="utf-8") as f:
yaml.dump(
data,
self.conversation_history,
f,
indent=4,
default_flow_style=False,

@ -7,9 +7,6 @@ import schedule
from loguru import logger
# from swarms import Agent
class CronJobError(Exception):
"""Base exception class for CronJob errors."""

@ -1,253 +0,0 @@
from swarms.structs.agent import Agent
from typing import List
from swarms.structs.conversation import Conversation
import uuid
import random
from loguru import logger
from typing import Optional
class QASwarm:
"""
A Question and Answer swarm system where random agents ask questions to speaker agents.
This system allows for dynamic Q&A sessions where:
- Multiple agents can act as questioners
- One or multiple agents can act as speakers/responders
- Questions are asked randomly by different agents
- The conversation is tracked and managed
- Agents are showcased to each other with detailed information
"""
def __init__(
self,
name: str = "QandA",
description: str = "Question and Answer Swarm System",
agents: List[Agent] = None,
speaker_agents: List[Agent] = None,
id: str = str(uuid.uuid4()),
max_loops: int = 5,
show_dashboard: bool = True,
speaker_agent: Agent = None,
showcase_agents: bool = True,
**kwargs,
):
self.id = id
self.name = name
self.description = description
self.max_loops = max_loops
self.show_dashboard = show_dashboard
self.agents = agents or []
self.speaker_agents = speaker_agents or []
self.kwargs = kwargs
self.speaker_agent = speaker_agent
self.showcase_agents = showcase_agents
self.conversation = Conversation()
# Validate setup
self._validate_setup()
def _validate_setup(self):
"""Validate that the Q&A system is properly configured."""
if not self.agents:
logger.warning(
"No questioner agents provided. Add agents using add_agent() method."
)
if not self.speaker_agents and not self.speaker_agent:
logger.warning(
"No speaker agents provided. Add speaker agents using add_speaker_agent() method."
)
if (
not self.agents
and not self.speaker_agents
and not self.speaker_agent
):
raise ValueError(
"At least one agent (questioner or speaker) must be provided."
)
def add_agent(self, agent: Agent):
"""Add a questioner agent to the swarm."""
self.agents.append(agent)
logger.info(f"Added questioner agent: {agent.agent_name}")
def add_speaker_agent(self, agent: Agent):
"""Add a speaker agent to the swarm."""
if self.speaker_agents is None:
self.speaker_agents = []
self.speaker_agents.append(agent)
logger.info(f"Added speaker agent: {agent.agent_name}")
def get_agent_info(self, agent: Agent) -> dict:
"""Extract key information about an agent for showcasing."""
info = {
"name": getattr(agent, "agent_name", "Unknown Agent"),
"description": getattr(
agent, "agent_description", "No description available"
),
"role": getattr(agent, "role", "worker"),
}
# Get system prompt preview (first 50 characters)
system_prompt = getattr(agent, "system_prompt", "")
if system_prompt:
info["system_prompt_preview"] = (
system_prompt[:50] + "..."
if len(system_prompt) > 50
else system_prompt
)
else:
info["system_prompt_preview"] = (
"No system prompt available"
)
return info
def showcase_speaker_to_questioner(
self, questioner: Agent, speaker: Agent
) -> str:
"""Create a showcase prompt introducing the speaker agent to the questioner."""
speaker_info = self.get_agent_info(speaker)
showcase_prompt = f"""
You are about to ask a question to a specialized agent. Here's what you need to know about them:
**Speaker Agent Information:**
- **Name**: {speaker_info['name']}
- **Role**: {speaker_info['role']}
- **Description**: {speaker_info['description']}
- **System Prompt Preview**: {speaker_info['system_prompt_preview']}
Please craft a thoughtful, relevant question that takes into account this agent's expertise and background.
Your question should be specific and demonstrate that you understand their role and capabilities.
"""
return showcase_prompt
def showcase_questioner_to_speaker(
self, speaker: Agent, questioner: Agent
) -> str:
"""Create a showcase prompt introducing the questioner agent to the speaker."""
questioner_info = self.get_agent_info(questioner)
showcase_prompt = f"""
You are about to answer a question from another agent. Here's what you need to know about them:
**Questioner Agent Information:**
- **Name**: {questioner_info['name']}
- **Role**: {questioner_info['role']}
- **Description**: {questioner_info['description']}
- **System Prompt Preview**: {questioner_info['system_prompt_preview']}
Please provide a comprehensive answer that demonstrates your expertise and addresses their question thoroughly.
Consider their background and role when formulating your response.
"""
return showcase_prompt
def random_select_agent(self, agents: List[Agent]) -> Agent:
"""Randomly select an agent from the list."""
if not agents:
raise ValueError("No agents available for selection")
return random.choice(agents)
def get_current_speaker(self) -> Agent:
"""Get the current speaker agent (either from speaker_agents list or single speaker_agent)."""
if self.speaker_agent:
return self.speaker_agent
elif self.speaker_agents:
return self.random_select_agent(self.speaker_agents)
else:
raise ValueError("No speaker agent available")
def run(
self, task: str, img: Optional[str] = None, *args, **kwargs
):
"""Run the Q&A session with agent showcasing."""
self.conversation.add(role="user", content=task)
# Get current speaker
current_speaker = self.get_current_speaker()
# Select a random questioner
questioner = self.random_select_agent(self.agents)
# Showcase agents to each other if enabled
if self.showcase_agents:
# Showcase speaker to questioner
speaker_showcase = self.showcase_speaker_to_questioner(
questioner, current_speaker
)
questioner_task = f"{speaker_showcase}\n\nNow ask a question about: {task}"
# Showcase questioner to speaker
questioner_showcase = self.showcase_questioner_to_speaker(
current_speaker, questioner
)
else:
questioner_task = f"Ask a question about {task} to {current_speaker.agent_name}"
# Generate question
question = questioner.run(
task=questioner_task,
img=img,
*args,
**kwargs,
)
self.conversation.add(
role=questioner.agent_name, content=question
)
# Prepare answer task with showcasing if enabled
if self.showcase_agents:
answer_task = f"{questioner_showcase}\n\nAnswer this question from {questioner.agent_name}: {question}"
else:
answer_task = f"Answer the question '{question}' from {questioner.agent_name}"
# Generate answer
answer = current_speaker.run(
task=answer_task,
img=img,
*args,
**kwargs,
)
self.conversation.add(
role=current_speaker.agent_name, content=answer
)
return answer
def run_multi_round(
self,
task: str,
rounds: int = 3,
img: Optional[str] = None,
*args,
**kwargs,
):
"""Run multiple rounds of Q&A with different questioners."""
results = []
for round_num in range(rounds):
logger.info(
f"Starting Q&A round {round_num + 1}/{rounds}"
)
round_result = self.run(task, img, *args, **kwargs)
results.append(
{"round": round_num + 1, "result": round_result}
)
return results
def get_conversation_history(self):
"""Get the conversation history."""
return self.conversation.get_history()
def clear_conversation(self):
"""Clear the conversation history."""
self.conversation = Conversation()
logger.info("Conversation history cleared")

@ -1,15 +1,7 @@
"""
Social Algorithms for Multi-Agent Communication
This module provides a flexible framework for defining custom social algorithms
that control how agents communicate and interact with each other in multi-agent systems.
"""
import time
import uuid
from typing import Any, Callable, Dict, List, Optional
from dataclasses import dataclass
from enum import Enum
from swarms.structs.agent import Agent
from swarms.structs.omni_agent_types import AgentType
@ -19,18 +11,6 @@ from swarms.utils.output_types import OutputType
logger = initialize_logger(log_folder="social_algorithms")
class SocialAlgorithmType(Enum):
"""Types of social algorithms supported."""
CUSTOM = "custom"
SEQUENTIAL = "sequential"
CONCURRENT = "concurrent"
HIERARCHICAL = "hierarchical"
MESH = "mesh"
ROUND_ROBIN = "round_robin"
BROADCAST = "broadcast"
@dataclass
class CommunicationStep:
"""Represents a single step in a social algorithm."""

@ -1,191 +0,0 @@
from pydantic.v1 import BaseModel
from typing import List, Callable
from swarms.utils.loguru_logger import initialize_logger
logger = initialize_logger(log_folder="swarm_registry")
class SwarmRegistry(BaseModel):
swarm_pool: List[Callable] = []
def add(self, swarm: Callable, *args, **kwargs):
"""
Adds a swarm to the registry.
Args:
swarm (Callable): The swarm to add to the registry.
"""
self.swarm_pool.append(swarm, *args, **kwargs)
def query(self, swarm_name: str) -> Callable:
"""
Queries the registry for a swarm by name.
Args:
swarm_name (str): The name of the swarm to query.
Returns:
Callable: The swarm function corresponding to the given name.
"""
if not self.swarm_pool:
raise ValueError("No swarms found in registry")
if not swarm_name:
raise ValueError("No swarm name provided.")
for swarm in self.swarm_pool:
if swarm.__name__ == swarm_name:
name = swarm.__name__
description = (
swarm.__doc__.strip().split("\n")[0]
or swarm.description
)
agent_count = len(swarm.agents)
task_count = len(swarm.tasks)
log = f"Swarm: {name}\nDescription: {description}\nAgents: {agent_count}\nTasks: {task_count}"
logger.info(log)
return swarm
raise ValueError(
f"Swarm '{swarm_name}' not found in registry."
)
def remove(self, swarm_name: str):
"""
Removes a swarm from the registry by name.
Args:
swarm_name (str): The name of the swarm to remove.
"""
for swarm in self.swarm_pool:
if swarm.__name__ == swarm_name:
self.swarm_pool.remove(swarm)
return
raise ValueError(
f"Swarm '{swarm_name}' not found in registry."
)
def list_swarms(self) -> List[str]:
"""
Lists the names of all swarms in the registry.
Returns:
List[str]: A list of swarm names.
"""
if not self.swarm_pool:
raise ValueError("No swarms found in registry.")
for swarm in self.swarm_pool:
name = swarm.__name__
description = (
swarm.__doc__.strip().split("\n")[0]
or swarm.description
)
agent_count = len(swarm.agents)
task_count = len(swarm.tasks)
log = f"Swarm: {name}\nDescription: {description}\nAgents: {agent_count}\nTasks: {task_count}"
logger.info(log)
return [swarm.__name__ for swarm in self.swarm_pool]
def run(self, swarm_name: str, *args, **kwargs):
"""
Runs a swarm by name with the given arguments.
Args:
swarm_name (str): The name of the swarm to run.
*args: Variable length argument list.
**kwargs: Arbitrary keyword arguments.
Returns:
Any: The result of running the swarm.
"""
swarm = self.query(swarm_name)
return swarm(*args, **kwargs)
def add_list_of_swarms(self, swarms: List[Callable]):
"""
Adds a list of swarms to the registry.
Args:
swarms (List[Callable]): A list of swarms to add to the registry.
"""
for swarm in swarms:
self.add(swarm)
return self.swarm_pool
def query_multiple_of_swarms(
self, swarm_names: List[str]
) -> List[Callable]:
"""
Queries the registry for multiple swarms by name.
Args:
swarm_names (List[str]): A list of swarm names to query.
Returns:
List[Callable]: A list of swarm functions corresponding to the given names.
"""
return [self.query(swarm_name) for swarm_name in swarm_names]
def remove_list_of_swarms(self, swarm_names: List[str]):
"""
Removes a list of swarms from the registry by name.
Args:
swarm_names (List[str]): A list of swarm names to remove.
"""
for swarm_name in swarm_names:
self.remove(swarm_name)
return self.swarm_pool
def run_multiple_of_swarms(
self, swarm_names: List[str], *args, **kwargs
):
"""
Runs a list of swarms by name with the given arguments.
Args:
swarm_names (List[str]): A list of swarm names to run.
*args: Variable length argument list.
**kwargs: Arbitrary keyword arguments.
Returns:
List[Any]: A list of results of running the swarms.
"""
return [
self.run(swarm_name, *args, **kwargs)
for swarm_name in swarm_names
]
# Decorator to add a function to the registry
def swarm_registry():
"""
Decorator to add a function to the registry.
Args:
swarm_registry (SwarmRegistry): The swarm registry instance.
Returns:
Callable: The decorated function.
"""
def decorator(func, *args, **kwargs):
try:
swarm_registry = SwarmRegistry()
swarm_registry.add(func, *args, **kwargs)
logger.info(
f"Added swarm '{func.__name__}' to the registry."
)
return func
except Exception as e:
logger.error(str(e))
raise
return decorator

@ -546,8 +546,6 @@ class SwarmRouter:
description=self.description,
agents=self.agents,
max_loops=self.max_loops,
auto_save=self.autosave,
return_str_on=self.return_entire_history,
output_type=self.output_type,
*args,
**kwargs,

@ -1,9 +1,8 @@
import uuid
from collections import Counter
from datetime import datetime
from datetime import datetime, timezone
from typing import Any, List, Optional
import numpy as np
from litellm import embedding
from pydantic import BaseModel, Field
@ -14,6 +13,47 @@ from swarms.utils.loguru_logger import initialize_logger
logger = initialize_logger(log_folder="tree_swarm")
def extract_keywords(prompt: str, top_n: int = 5) -> List[str]:
"""
A simplified keyword extraction function using basic word splitting instead of NLTK tokenization.
Args:
prompt (str): The text prompt to extract keywords from
top_n (int): Maximum number of keywords to return
Returns:
List[str]: List of extracted keywords
"""
words = prompt.lower().split()
filtered_words = [word for word in words if word.isalnum()]
word_counts = Counter(filtered_words)
return [word for word, _ in word_counts.most_common(top_n)]
def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
"""
Calculate cosine similarity between two vectors.
Args:
vec1 (List[float]): First vector
vec2 (List[float]): Second vector
Returns:
float: Cosine similarity score between 0 and 1
"""
# Calculate dot product
dot_product = sum(a * b for a, b in zip(vec1, vec2))
# Calculate norms
norm1 = sum(a * a for a in vec1) ** 0.5
norm2 = sum(b * b for b in vec2) ** 0.5
if norm1 == 0 or norm2 == 0:
return 0.0
return dot_product / (norm1 * norm2)
# Pydantic Models for Logging
class AgentLogInput(BaseModel):
"""
@ -32,7 +72,7 @@ class AgentLogInput(BaseModel):
agent_name: str
task: str
timestamp: datetime = Field(
default_factory=lambda: datetime.now(datetime.UTC)
default_factory=lambda: datetime.now(timezone.utc)
)
@ -53,7 +93,7 @@ class AgentLogOutput(BaseModel):
agent_name: str
result: Any
timestamp: datetime = Field(
default_factory=lambda: datetime.now(datetime.UTC)
default_factory=lambda: datetime.now(timezone.utc)
)
@ -77,52 +117,11 @@ class TreeLog(BaseModel):
task: str
selected_agent: str
timestamp: datetime = Field(
default_factory=lambda: datetime.now(datetime.UTC)
default_factory=lambda: datetime.now(timezone.utc)
)
result: Any
def extract_keywords(prompt: str, top_n: int = 5) -> List[str]:
"""
A simplified keyword extraction function using basic word splitting instead of NLTK tokenization.
Args:
prompt (str): The text prompt to extract keywords from
top_n (int): Maximum number of keywords to return
Returns:
List[str]: List of extracted keywords
"""
words = prompt.lower().split()
filtered_words = [word for word in words if word.isalnum()]
word_counts = Counter(filtered_words)
return [word for word, _ in word_counts.most_common(top_n)]
def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
"""
Calculate cosine similarity between two vectors.
Args:
vec1 (List[float]): First vector
vec2 (List[float]): Second vector
Returns:
float: Cosine similarity score between 0 and 1
"""
vec1 = np.array(vec1)
vec2 = np.array(vec2)
dot_product = np.dot(vec1, vec2)
norm1 = np.linalg.norm(vec1)
norm2 = np.linalg.norm(vec2)
if norm1 == 0 or norm2 == 0:
return 0.0
return dot_product / (norm1 * norm2)
class TreeAgent(Agent):
"""
A specialized Agent class that contains information about the system prompt's
@ -137,6 +136,7 @@ class TreeAgent(Agent):
model_name: str = "gpt-4.1",
agent_name: Optional[str] = None,
embedding_model_name: str = "text-embedding-ada-002",
verbose: bool = False,
*args,
**kwargs,
):
@ -150,6 +150,7 @@ class TreeAgent(Agent):
model_name (str): Name of the language model to use
agent_name (Optional[str]): Alternative name for the agent
embedding_model_name (str): Name of the embedding model to use
verbose (bool): Whether to enable verbose logging
*args: Additional positional arguments
**kwargs: Additional keyword arguments
"""
@ -164,6 +165,7 @@ class TreeAgent(Agent):
**kwargs,
)
self.embedding_model_name = embedding_model_name
self.verbose = verbose
# Generate system prompt embedding using litellm
if system_prompt:
@ -195,7 +197,8 @@ class TreeAgent(Agent):
response = embedding(
model=self.embedding_model_name, input=[text]
)
logger.info(f"Embedding type: {type(response)}")
if self.verbose:
logger.info(f"Embedding type: {type(response)}")
# print(response)
# Handle different response structures from litellm
if hasattr(response, "data") and response.data:
@ -207,17 +210,20 @@ class TreeAgent(Agent):
):
return response.data[0]["embedding"]
else:
logger.error(
f"Unexpected response structure: {response.data[0]}"
)
if self.verbose:
logger.error(
f"Unexpected response structure: {response.data[0]}"
)
return [0.0] * 1536
else:
logger.error(
f"Unexpected response structure: {response}"
)
if self.verbose:
logger.error(
f"Unexpected response structure: {response}"
)
return [0.0] * 1536
except Exception as e:
logger.error(f"Error getting embedding: {e}")
if self.verbose:
logger.error(f"Error getting embedding: {e}")
# Return a zero vector as fallback
return [0.0] * 1536 # Default OpenAI embedding dimension
@ -264,20 +270,24 @@ class TreeAgent(Agent):
input_log = AgentLogInput(
agent_name=self.agent_name,
task=task,
timestamp=datetime.now(),
timestamp=datetime.now(timezone.utc),
)
logger.info(f"Running task on {self.agent_name}: {task}")
logger.debug(f"Input Log: {input_log.json()}")
if self.verbose:
logger.info(f"Running task on {self.agent_name}: {task}")
logger.debug(f"Input Log: {input_log.json()}")
result = self.run(task=task, img=img, *args, **kwargs)
output_log = AgentLogOutput(
agent_name=self.agent_name,
result=result,
timestamp=datetime.now(),
timestamp=datetime.now(timezone.utc),
)
logger.info(f"Task result from {self.agent_name}: {result}")
logger.debug(f"Output Log: {output_log.json()}")
if self.verbose:
logger.info(
f"Task result from {self.agent_name}: {result}"
)
logger.debug(f"Output Log: {output_log.json()}")
return result
@ -306,25 +316,36 @@ class TreeAgent(Agent):
similarity = cosine_similarity(
self.system_prompt_embedding, task_embedding
)
logger.info(
f"Semantic similarity between task and {self.agent_name}: {similarity:.2f}"
)
if self.verbose:
logger.info(
f"Semantic similarity between task and {self.agent_name}: {similarity:.2f}"
)
return similarity >= threshold
return True # Return True if keyword match is found
class Tree:
def __init__(self, tree_name: str, agents: List[TreeAgent]):
def __init__(
self,
tree_name: str,
agents: List[TreeAgent],
verbose: bool = False,
):
"""
Initializes a tree of agents.
Args:
tree_name (str): The name of the tree.
agents (List[TreeAgent]): A list of agents in the tree.
verbose (bool): Whether to enable verbose logging
"""
self.tree_name = tree_name
self.agents = agents
self.verbose = verbose
# Pass verbose to all agents
for agent in self.agents:
agent.verbose = verbose
self.calculate_agent_distances()
def calculate_agent_distances(self):
@ -334,9 +355,10 @@ class Tree:
This method computes the semantic distance between consecutive agents using their system prompt
embeddings and sorts the agents by distance for optimal task routing.
"""
logger.info(
f"Calculating distances between agents in tree '{self.tree_name}'"
)
if self.verbose:
logger.info(
f"Calculating distances between agents in tree '{self.tree_name}'"
)
for i, agent in enumerate(self.agents):
if i > 0:
agent.distance = agent.calculate_distance(
@ -359,15 +381,17 @@ class Tree:
Returns:
Optional[TreeAgent]: The most relevant agent, or None if no match found.
"""
logger.info(
f"Searching relevant agent in tree '{self.tree_name}' for task: {task}"
)
if self.verbose:
logger.info(
f"Searching relevant agent in tree '{self.tree_name}' for task: {task}"
)
for agent in self.agents:
if agent.is_relevant_for_task(task):
return agent
logger.warning(
f"No relevant agent found in tree '{self.tree_name}' for task: {task}"
)
if self.verbose:
logger.warning(
f"No relevant agent found in tree '{self.tree_name}' for task: {task}"
)
return None
def log_tree_execution(
@ -380,13 +404,14 @@ class Tree:
tree_name=self.tree_name,
task=task,
selected_agent=selected_agent.agent_name,
timestamp=datetime.now(),
timestamp=datetime.now(timezone.utc),
result=result,
)
logger.info(
f"Tree '{self.tree_name}' executed task with agent '{selected_agent.agent_name}'"
)
logger.debug(f"Tree Log: {tree_log.json()}")
if self.verbose:
logger.info(
f"Tree '{self.tree_name}' executed task with agent '{selected_agent.agent_name}'"
)
logger.debug(f"Tree Log: {tree_log.json()}")
class ForestSwarm:
@ -397,6 +422,7 @@ class ForestSwarm:
trees: List[Tree] = [],
shared_memory: Any = None,
rules: str = None,
verbose: bool = False,
*args,
**kwargs,
):
@ -409,6 +435,7 @@ class ForestSwarm:
trees (List[Tree]): A list of trees in the structure
shared_memory (Any): Shared memory object for inter-tree communication
rules (str): Rules governing the forest swarm behavior
verbose (bool): Whether to enable verbose logging
*args: Additional positional arguments
**kwargs: Additional keyword arguments
"""
@ -416,10 +443,13 @@ class ForestSwarm:
self.description = description
self.trees = trees
self.shared_memory = shared_memory
self.verbose = verbose
# Pass verbose to all trees
for tree in self.trees:
tree.verbose = verbose
self.save_file_path = f"forest_swarm_{uuid.uuid4().hex}.json"
self.conversation = Conversation(
time_enabled=False,
auto_save=True,
save_filepath=self.save_file_path,
rules=rules,
)
@ -434,13 +464,15 @@ class ForestSwarm:
Returns:
Optional[Tree]: The most relevant tree, or None if no match found
"""
logger.info(
f"Searching for the most relevant tree for task: {task}"
)
if self.verbose:
logger.info(
f"Searching for the most relevant tree for task: {task}"
)
for tree in self.trees:
if tree.find_relevant_agent(task):
return tree
logger.warning(f"No relevant tree found for task: {task}")
if self.verbose:
logger.warning(f"No relevant tree found for task: {task}")
return None
def run(self, task: str, img: str = None, *args, **kwargs) -> Any:
@ -457,9 +489,10 @@ class ForestSwarm:
Any: The result of the task after it has been processed by the agents
"""
try:
logger.info(
f"Running task across MultiAgentTreeStructure: {task}"
)
if self.verbose:
logger.info(
f"Running task across MultiAgentTreeStructure: {task}"
)
relevant_tree = self.find_relevant_tree(task)
if relevant_tree:
agent = relevant_tree.find_relevant_agent(task)
@ -472,14 +505,32 @@ class ForestSwarm:
)
return result
else:
logger.error(
"Task could not be completed: No relevant agent or tree found."
)
if self.verbose:
logger.error(
"Task could not be completed: No relevant agent or tree found."
)
return "No relevant agent found to handle this task."
except Exception as error:
logger.error(
f"Error detected in the ForestSwarm, check your inputs and try again ;) {error}"
)
if self.verbose:
logger.error(
f"Error detected in the ForestSwarm, check your inputs and try again ;) {error}"
)
def batched_run(
self,
tasks: List[str],
*args,
**kwargs,
) -> List[Any]:
"""
Execute the given tasks by finding the most relevant tree and agent within that tree.
Args:
tasks: List[str]: The tasks to be executed
*args: Additional positional arguments
**kwargs: Additional keyword arguments
"""
return [self.run(task, *args, **kwargs) for task in tasks]
# # Example Usage:

@ -1,8 +1,4 @@
from swarms.tools.base_tool import BaseTool
from swarms.tools.cohere_func_call_schema import (
CohereFuncSchema,
ParameterDefinition,
)
from swarms.tools.json_utils import base_model_to_json
from swarms.tools.mcp_client_tools import (
_create_server_tool_mapping,
@ -56,8 +52,6 @@ __all__ = [
"ToolFunction",
"tool",
"BaseTool",
"CohereFuncSchema",
"ParameterDefinition",
"ToolStorage",
"tool_registry",
"base_model_to_json",

@ -1,18 +0,0 @@
from pydantic import BaseModel, Field
from typing import Dict
class ParameterDefinition(BaseModel):
description: str = Field(
..., title="Description of the parameter"
)
type: str = Field(..., title="Type of the parameter")
required: bool = Field(..., title="Is the parameter required?")
class CohereFuncSchema(BaseModel):
name: str = Field(..., title="Name of the tool")
description: str = Field(..., title="Description of the tool")
parameter_definitions: Dict[str, ParameterDefinition] = Field(
..., title="Parameter definitions for the tool"
)

@ -64,6 +64,7 @@ class MCPExecutionError(MCPError):
########################################################
def transform_mcp_tool_to_openai_tool(
mcp_tool: MCPTool,
verbose: bool = False,
) -> ChatCompletionToolParam:
"""
Convert an MCP tool to an OpenAI tool.
@ -72,9 +73,11 @@ def transform_mcp_tool_to_openai_tool(
Returns:
ChatCompletionToolParam: The OpenAI-compatible tool parameter.
"""
logger.info(
f"Transforming MCP tool '{mcp_tool.name}' to OpenAI tool format."
)
if verbose:
logger.info(
f"Transforming MCP tool '{mcp_tool.name}' to OpenAI tool format."
)
return ChatCompletionToolParam(
type="function",
function=FunctionDefinition(
@ -529,12 +532,15 @@ def get_tools_for_multiple_mcp_servers(
logger.info(
f"get_tools_for_multiple_mcp_servers called for {len(urls)} urls."
)
tools = []
(
min(32, os.cpu_count() + 4)
if max_workers is None
else max_workers
)
with ThreadPoolExecutor(max_workers=max_workers) as executor:
if exists(connections):
future_to_url = {

@ -1,9 +1,9 @@
import json
from typing import List, Any, Callable
import re
from typing import Any, Callable, List
from swarms.utils.parse_code import extract_code_from_markdown
from swarms.utils.loguru_logger import initialize_logger
from swarms.utils.parse_code import extract_code_from_markdown
logger = initialize_logger(log_folder="tool_parse_exec")

@ -1,343 +0,0 @@
import base64
from typing import Union, Dict, Any, Tuple
import requests
from pathlib import Path
import wave
import numpy as np
def encode_audio_to_base64(audio_path: Union[str, Path]) -> str:
"""
Encode a WAV file to base64 string.
Args:
audio_path (Union[str, Path]): Path to the WAV file
Returns:
str: Base64 encoded string of the audio file
Raises:
FileNotFoundError: If the audio file doesn't exist
ValueError: If the file is not a valid WAV file
"""
try:
audio_path = Path(audio_path)
if not audio_path.exists():
raise FileNotFoundError(
f"Audio file not found: {audio_path}"
)
if not audio_path.suffix.lower() == ".wav":
raise ValueError("File must be a WAV file")
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
return base64.b64encode(audio_data).decode("utf-8")
except Exception as e:
raise Exception(f"Error encoding audio file: {str(e)}")
def decode_base64_to_audio(
base64_string: str, output_path: Union[str, Path]
) -> None:
"""
Decode a base64 string to a WAV file.
Args:
base64_string (str): Base64 encoded audio data
output_path (Union[str, Path]): Path where the WAV file should be saved
Raises:
ValueError: If the base64 string is invalid
IOError: If there's an error writing the file
"""
try:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
audio_data = base64.b64decode(base64_string)
with open(output_path, "wb") as audio_file:
audio_file.write(audio_data)
except Exception as e:
raise Exception(f"Error decoding audio data: {str(e)}")
def download_audio_from_url(
url: str, output_path: Union[str, Path]
) -> None:
"""
Download an audio file from a URL and save it locally.
Args:
url (str): URL of the audio file
output_path (Union[str, Path]): Path where the audio file should be saved
Raises:
requests.RequestException: If there's an error downloading the file
IOError: If there's an error saving the file
"""
try:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
response = requests.get(url)
response.raise_for_status()
with open(output_path, "wb") as audio_file:
audio_file.write(response.content)
except Exception as e:
raise Exception(f"Error downloading audio file: {str(e)}")
def process_audio_with_model(
audio_path: Union[str, Path],
model: str,
prompt: str,
voice: str = "alloy",
format: str = "wav",
) -> Dict[str, Any]:
"""
Process an audio file with a model that supports audio input/output.
Args:
audio_path (Union[str, Path]): Path to the input WAV file
model (str): Model name to use for processing
prompt (str): Text prompt to accompany the audio
voice (str, optional): Voice to use for audio output. Defaults to "alloy"
format (str, optional): Audio format. Defaults to "wav"
Returns:
Dict[str, Any]: Model response containing both text and audio if applicable
Raises:
ImportError: If litellm is not installed
ValueError: If the model doesn't support audio processing
"""
try:
from litellm import (
completion,
supports_audio_input,
supports_audio_output,
)
if not supports_audio_input(model):
raise ValueError(
f"Model {model} does not support audio input"
)
# Encode the audio file
encoded_audio = encode_audio_to_base64(audio_path)
# Prepare the messages
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "input_audio",
"input_audio": {
"data": encoded_audio,
"format": format,
},
},
],
}
]
# Make the API call
response = completion(
model=model,
modalities=["text", "audio"],
audio={"voice": voice, "format": format},
messages=messages,
)
return response
except ImportError:
raise ImportError(
"Please install litellm: pip install litellm"
)
except Exception as e:
raise Exception(
f"Error processing audio with model: {str(e)}"
)
def read_wav_file(
file_path: Union[str, Path],
) -> Tuple[np.ndarray, int]:
"""
Read a WAV file and return its audio data and sample rate.
Args:
file_path (Union[str, Path]): Path to the WAV file
Returns:
Tuple[np.ndarray, int]: Audio data as numpy array and sample rate
Raises:
FileNotFoundError: If the file doesn't exist
ValueError: If the file is not a valid WAV file
"""
try:
file_path = Path(file_path)
if not file_path.exists():
raise FileNotFoundError(
f"Audio file not found: {file_path}"
)
with wave.open(str(file_path), "rb") as wav_file:
# Get audio parameters
n_channels = wav_file.getnchannels()
sample_width = wav_file.getsampwidth()
frame_rate = wav_file.getframerate()
n_frames = wav_file.getnframes()
# Read audio data
frames = wav_file.readframes(n_frames)
# Convert to numpy array
dtype = np.int16 if sample_width == 2 else np.int8
audio_data = np.frombuffer(frames, dtype=dtype)
# Reshape if stereo
if n_channels == 2:
audio_data = audio_data.reshape(-1, 2)
return audio_data, frame_rate
except Exception as e:
raise Exception(f"Error reading WAV file: {str(e)}")
def write_wav_file(
audio_data: np.ndarray,
file_path: Union[str, Path],
sample_rate: int,
sample_width: int = 2,
) -> None:
"""
Write audio data to a WAV file.
Args:
audio_data (np.ndarray): Audio data as numpy array
file_path (Union[str, Path]): Path where to save the WAV file
sample_rate (int): Sample rate of the audio
sample_width (int, optional): Sample width in bytes. Defaults to 2 (16-bit)
Raises:
ValueError: If the audio data is invalid
IOError: If there's an error writing the file
"""
try:
file_path = Path(file_path)
file_path.parent.mkdir(parents=True, exist_ok=True)
# Ensure audio data is in the correct format
if audio_data.dtype != np.int16 and sample_width == 2:
audio_data = (audio_data * 32767).astype(np.int16)
elif audio_data.dtype != np.int8 and sample_width == 1:
audio_data = (audio_data * 127).astype(np.int8)
# Determine number of channels
n_channels = (
2
if len(audio_data.shape) > 1 and audio_data.shape[1] == 2
else 1
)
with wave.open(str(file_path), "wb") as wav_file:
wav_file.setnchannels(n_channels)
wav_file.setsampwidth(sample_width)
wav_file.setframerate(sample_rate)
wav_file.writeframes(audio_data.tobytes())
except Exception as e:
raise Exception(f"Error writing WAV file: {str(e)}")
def normalize_audio(audio_data: np.ndarray) -> np.ndarray:
"""
Normalize audio data to have maximum amplitude of 1.0.
Args:
audio_data (np.ndarray): Input audio data
Returns:
np.ndarray: Normalized audio data
"""
return audio_data / np.max(np.abs(audio_data))
def convert_to_mono(audio_data: np.ndarray) -> np.ndarray:
"""
Convert stereo audio to mono by averaging channels.
Args:
audio_data (np.ndarray): Input audio data (stereo)
Returns:
np.ndarray: Mono audio data
"""
if len(audio_data.shape) == 1:
return audio_data
return np.mean(audio_data, axis=1)
def encode_wav_to_base64(
audio_data: np.ndarray, sample_rate: int
) -> str:
"""
Convert audio data to base64 encoded WAV string.
Args:
audio_data (np.ndarray): Audio data
sample_rate (int): Sample rate of the audio
Returns:
str: Base64 encoded WAV data
"""
# Create a temporary WAV file in memory
with wave.open("temp.wav", "wb") as wav_file:
wav_file.setnchannels(1 if len(audio_data.shape) == 1 else 2)
wav_file.setsampwidth(2) # 16-bit
wav_file.setframerate(sample_rate)
wav_file.writeframes(audio_data.tobytes())
# Read the file and encode to base64
with open("temp.wav", "rb") as f:
wav_bytes = f.read()
# Clean up temporary file
Path("temp.wav").unlink()
return base64.b64encode(wav_bytes).decode("utf-8")
def decode_base64_to_wav(
base64_string: str,
) -> Tuple[np.ndarray, int]:
"""
Convert base64 encoded WAV string to audio data and sample rate.
Args:
base64_string (str): Base64 encoded WAV data
Returns:
Tuple[np.ndarray, int]: Audio data and sample rate
"""
# Decode base64 string
wav_bytes = base64.b64decode(base64_string)
# Write to temporary file
with open("temp.wav", "wb") as f:
f.write(wav_bytes)
# Read the WAV file
audio_data, sample_rate = read_wav_file("temp.wav")
# Clean up temporary file
Path("temp.wav").unlink()
return audio_data, sample_rate

@ -1,151 +0,0 @@
"""
Package installation utility that checks for package existence and installs if needed.
Supports both pip and conda package managers.
"""
import importlib.util
import subprocess
import sys
from typing import Literal, Optional, Union
from swarms.utils.loguru_logger import initialize_logger
from importlib.metadata import distribution, PackageNotFoundError
logger = initialize_logger("autocheckpackages")
def check_and_install_package(
package_name: str,
package_manager: Literal["pip", "conda"] = "pip",
version: Optional[str] = None,
upgrade: bool = False,
) -> bool:
"""
Check if a package is installed and install it if not found.
Args:
package_name: Name of the package to check/install
package_manager: Package manager to use ('pip' or 'conda')
version: Specific version to install (optional)
upgrade: Whether to upgrade the package if it exists
Returns:
bool: True if package is available after check/install, False if installation failed
Raises:
ValueError: If invalid package manager is specified
"""
try:
# Check if package exists
if package_manager == "pip":
try:
distribution(package_name)
if not upgrade:
logger.info(
f"Package {package_name} is already installed"
)
return True
except PackageNotFoundError:
pass
# Construct installation command
cmd = [sys.executable, "-m", "pip", "install"]
if upgrade:
cmd.append("--upgrade")
if version:
cmd.append(f"{package_name}=={version}")
else:
cmd.append(package_name)
elif package_manager == "conda":
# Check if conda is available
try:
subprocess.run(
["conda", "--version"],
check=True,
capture_output=True,
)
except (subprocess.CalledProcessError, FileNotFoundError):
logger.error(
"Conda is not available. Please install conda first."
)
return False
# Construct conda command
cmd = ["conda", "install", "-y"]
if version:
cmd.append(f"{package_name}={version}")
else:
cmd.append(package_name)
else:
raise ValueError(
f"Invalid package manager: {package_manager}"
)
# Run installation
logger.info(f"Installing {package_name}...")
subprocess.run(
cmd, check=True, capture_output=True, text=True
)
# Verify installation
try:
importlib.import_module(package_name)
logger.info(f"Successfully installed {package_name}")
return True
except ImportError:
logger.error(
f"Package {package_name} was installed but cannot be imported"
)
return False
except subprocess.CalledProcessError as e:
logger.error(f"Failed to install {package_name}: {e.stderr}")
return False
except Exception as e:
logger.error(
f"Unexpected error while installing {package_name}: {str(e)}"
)
return False
def auto_check_and_download_package(
packages: Union[str, list[str]],
package_manager: Literal["pip", "conda"] = "pip",
upgrade: bool = False,
) -> bool:
"""
Ensure multiple packages are installed.
Args:
packages: Single package name or list of package names
package_manager: Package manager to use ('pip' or 'conda')
upgrade: Whether to upgrade existing packages
Returns:
bool: True if all packages are available, False if any installation failed
"""
if isinstance(packages, str):
packages = [packages]
success = True
for package in packages:
if ":" in package:
name, version = package.split(":")
if not check_and_install_package(
name, package_manager, version, upgrade
):
success = False
else:
if not check_and_install_package(
package, package_manager, upgrade=upgrade
):
success = False
return success
# if __name__ == "__main__":
# print(auto_check_and_download_package("torch"))

@ -1,88 +0,0 @@
# Best LLM Models by Task Type
# Simplified dictionary structure with model names and categories
best_models = {
"Vision": [
{"model": "gemini/gemini-2.5-pro", "category": "Vision"},
],
"text-generation": [
{
"model": "claude-sonnet-4-20250514",
"category": "text-generation",
},
{"model": "gpt-5-chat", "category": "text-generation"},
],
}
# Function to get all models for a task type
def get_models_by_task(task_type: str) -> list:
"""
Get all models for a specific task type.
Args:
task_type (str): The task category (e.g., 'WebDev', 'Vision', 'text-generation')
Returns:
list: List of all models for the task type
"""
if task_type not in best_models:
raise ValueError(
f"Task type '{task_type}' not found. Available types: {list(best_models.keys())}"
)
return best_models[task_type]
# Function to get the first model for a task type (simplified from get_top_model)
def get_first_model(task_type: str) -> dict:
"""
Get the first model for a specific task type.
Args:
task_type (str): The task category (e.g., 'WebDev', 'Vision', 'text-generation')
Returns:
dict: First model information with model name and category
"""
if task_type not in best_models:
raise ValueError(
f"Task type '{task_type}' not found. Available types: {list(best_models.keys())}"
)
models = best_models[task_type]
if not models:
raise ValueError(
f"No models found for task type '{task_type}'"
)
return models[0]
# Function to search for a specific model across all categories
def find_model_by_name(model_name: str) -> dict:
"""
Find a model by name across all task categories.
Args:
model_name (str): The model name to search for
Returns:
dict: Model information if found, None otherwise
"""
for task_type, models in best_models.items():
for model in models:
if model["model"].lower() == model_name.lower():
return model
return None
# Function to get all available task types
def get_available_task_types() -> list:
"""
Get all available task types/categories.
Returns:
list: List of all task type names
"""
return list(best_models.keys())

@ -1,54 +0,0 @@
from typing import Any
from litellm import image_generation
class ImageGenerator:
def __init__(
self,
model: str | None = None,
n: int | None = 2,
quality: Any = None,
response_format: str | None = None,
size: str | None = 10,
style: str | None = None,
user: str | None = None,
input_fidelity: str | None = None,
timeout: int = 600,
output_path_folder: str | None = "images",
api_key: str | None = None,
api_base: str | None = None,
):
self.model = model
self.n = n
self.quality = quality
self.response_format = response_format
self.size = size
self.style = style
self.user = user
self.input_fidelity = input_fidelity
self.timeout = timeout
self.output_path_folder = output_path_folder
self.api_key = api_key
self.api_base = api_base
def run(self, task: str = None):
return image_generation(
prompt=task,
model=self.model,
n=self.n,
quality=self.quality,
response_format=self.response_format,
size=self.size,
style=self.style,
user=self.user,
input_fidelity=self.input_fidelity,
timeout=self.timeout,
)
# if __name__ == "__main__":
# image_generator = ImageGenerator()
# print(image_generator.run(task="A beautiful sunset over a calm ocean"))
# print(model_list)

@ -1,5 +0,0 @@
def litellm_check_for_tools(model_name: str):
"""Check if the model supports tools."""
from litellm.utils import supports_function_calling
return supports_function_calling(model_name)

@ -1,114 +0,0 @@
from unittest.mock import MagicMock
import unittest
from swarms.structs.agent import Agent
from swarms.tools.tool_parse_exec import parse_and_execute_json
# Mock parse_and_execute_json for testing
parse_and_execute_json = MagicMock()
parse_and_execute_json.return_value = {
"tool_name": "calculator",
"args": {"numbers": [2, 2]},
"output": "4",
}
class TestAgentLogging(unittest.TestCase):
def setUp(self):
self.mock_tokenizer = MagicMock()
self.mock_tokenizer.count_tokens.return_value = 100
self.mock_short_memory = MagicMock()
self.mock_short_memory.get_memory_stats.return_value = {
"message_count": 2
}
self.mock_long_memory = MagicMock()
self.mock_long_memory.get_memory_stats.return_value = {
"item_count": 5
}
self.agent = Agent(
tokenizer=self.mock_tokenizer,
short_memory=self.mock_short_memory,
long_term_memory=self.mock_long_memory,
)
def test_log_step_metadata_basic(self):
log_result = self.agent.log_step_metadata(
1, "Test prompt", "Test response"
)
self.assertIn("step_id", log_result)
self.assertIn("timestamp", log_result)
self.assertIn("tokens", log_result)
self.assertIn("memory_usage", log_result)
self.assertEqual(log_result["tokens"]["total"], 200)
def test_log_step_metadata_no_long_term_memory(self):
self.agent.long_term_memory = None
log_result = self.agent.log_step_metadata(
1, "prompt", "response"
)
self.assertEqual(log_result["memory_usage"]["long_term"], {})
def test_log_step_metadata_timestamp(self):
log_result = self.agent.log_step_metadata(
1, "prompt", "response"
)
self.assertIn("timestamp", log_result)
def test_token_counting_integration(self):
self.mock_tokenizer.count_tokens.side_effect = [150, 250]
log_result = self.agent.log_step_metadata(
1, "prompt", "response"
)
self.assertEqual(log_result["tokens"]["total"], 400)
def test_agent_output_updating(self):
initial_total_tokens = sum(
step["tokens"]["total"]
for step in self.agent.agent_output.steps
)
self.agent.log_step_metadata(1, "prompt", "response")
final_total_tokens = sum(
step["tokens"]["total"]
for step in self.agent.agent_output.steps
)
self.assertEqual(
final_total_tokens - initial_total_tokens, 200
)
self.assertEqual(len(self.agent.agent_output.steps), 1)
class TestAgentLoggingIntegration(unittest.TestCase):
def setUp(self):
self.agent = Agent(agent_name="test-agent")
def test_full_logging_cycle(self):
task = "Test task"
max_loops = 1
result = self.agent._run(task, max_loops=max_loops)
self.assertIsInstance(result, dict)
self.assertIn("steps", result)
self.assertIsInstance(result["steps"], list)
self.assertEqual(len(result["steps"]), max_loops)
if result["steps"]:
step = result["steps"][0]
self.assertIn("step_id", step)
self.assertIn("timestamp", step)
self.assertIn("task", step)
self.assertIn("response", step)
self.assertEqual(step["task"], task)
self.assertEqual(step["response"], "Response for loop 1")
self.assertTrue(len(self.agent.agent_output.steps) > 0)
if __name__ == "__main__":
unittest.main()

@ -1,267 +0,0 @@
import unittest
from unittest.mock import patch
from swarms import create_agents_from_yaml
import os
class TestCreateAgentsFromYaml(unittest.TestCase):
def setUp(self):
# Mock the environment variable for API key
os.environ["OPENAI_API_KEY"] = "fake-api-key"
# Mock agent configuration YAML content
self.valid_yaml_content = """
agents:
- agent_name: "Financial-Analysis-Agent"
model:
openai_api_key: "fake-api-key"
model_name: "gpt-4o-mini"
temperature: 0.1
max_tokens: 2000
system_prompt: "financial_agent_sys_prompt"
max_loops: 1
autosave: true
dashboard: false
verbose: true
dynamic_temperature_enabled: true
saved_state_path: "finance_agent.json"
user_name: "swarms_corp"
retry_attempts: 1
context_length: 200000
return_step_meta: false
output_type: "str"
task: "How can I establish a ROTH IRA to buy stocks and get a tax break?"
- agent_name: "Stock-Analysis-Agent"
model:
openai_api_key: "fake-api-key"
model_name: "gpt-4o-mini"
temperature: 0.2
max_tokens: 1500
system_prompt: "stock_agent_sys_prompt"
max_loops: 2
autosave: true
dashboard: false
verbose: true
dynamic_temperature_enabled: false
saved_state_path: "stock_agent.json"
user_name: "stock_user"
retry_attempts: 3
context_length: 150000
return_step_meta: true
output_type: "json"
task: "What is the best strategy for long-term stock investment?"
"""
@patch(
"builtins.open",
new_callable=unittest.mock.mock_open,
read_data="",
)
@patch("yaml.safe_load")
def test_create_agents_return_agents(
self, mock_safe_load, mock_open
):
# Mock YAML content parsing
mock_safe_load.return_value = {
"agents": [
{
"agent_name": "Financial-Analysis-Agent",
"model": {
"openai_api_key": "fake-api-key",
"model_name": "gpt-4o-mini",
"temperature": 0.1,
"max_tokens": 2000,
},
"system_prompt": "financial_agent_sys_prompt",
"max_loops": 1,
"autosave": True,
"dashboard": False,
"verbose": True,
"dynamic_temperature_enabled": True,
"saved_state_path": "finance_agent.json",
"user_name": "swarms_corp",
"retry_attempts": 1,
"context_length": 200000,
"return_step_meta": False,
"output_type": "str",
"task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
}
]
}
# Test if agents are returned correctly
agents = create_agents_from_yaml(
"fake_yaml_path.yaml", return_type="agents"
)
self.assertEqual(len(agents), 1)
self.assertEqual(
agents[0].agent_name, "Financial-Analysis-Agent"
)
@patch(
"builtins.open",
new_callable=unittest.mock.mock_open,
read_data="",
)
@patch("yaml.safe_load")
@patch(
"swarms.Agent.run", return_value="Task completed successfully"
)
def test_create_agents_return_tasks(
self, mock_agent_run, mock_safe_load, mock_open
):
# Mock YAML content parsing
mock_safe_load.return_value = {
"agents": [
{
"agent_name": "Financial-Analysis-Agent",
"model": {
"openai_api_key": "fake-api-key",
"model_name": "gpt-4o-mini",
"temperature": 0.1,
"max_tokens": 2000,
},
"system_prompt": "financial_agent_sys_prompt",
"max_loops": 1,
"autosave": True,
"dashboard": False,
"verbose": True,
"dynamic_temperature_enabled": True,
"saved_state_path": "finance_agent.json",
"user_name": "swarms_corp",
"retry_attempts": 1,
"context_length": 200000,
"return_step_meta": False,
"output_type": "str",
"task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
}
]
}
# Test if tasks are executed and results are returned
task_results = create_agents_from_yaml(
"fake_yaml_path.yaml", return_type="tasks"
)
self.assertEqual(len(task_results), 1)
self.assertEqual(
task_results[0]["agent_name"], "Financial-Analysis-Agent"
)
self.assertIsNotNone(task_results[0]["output"])
@patch(
"builtins.open",
new_callable=unittest.mock.mock_open,
read_data="",
)
@patch("yaml.safe_load")
def test_create_agents_return_both(
self, mock_safe_load, mock_open
):
# Mock YAML content parsing
mock_safe_load.return_value = {
"agents": [
{
"agent_name": "Financial-Analysis-Agent",
"model": {
"openai_api_key": "fake-api-key",
"model_name": "gpt-4o-mini",
"temperature": 0.1,
"max_tokens": 2000,
},
"system_prompt": "financial_agent_sys_prompt",
"max_loops": 1,
"autosave": True,
"dashboard": False,
"verbose": True,
"dynamic_temperature_enabled": True,
"saved_state_path": "finance_agent.json",
"user_name": "swarms_corp",
"retry_attempts": 1,
"context_length": 200000,
"return_step_meta": False,
"output_type": "str",
"task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
}
]
}
# Test if both agents and tasks are returned
agents, task_results = create_agents_from_yaml(
"fake_yaml_path.yaml", return_type="both"
)
self.assertEqual(len(agents), 1)
self.assertEqual(len(task_results), 1)
self.assertEqual(
agents[0].agent_name, "Financial-Analysis-Agent"
)
self.assertIsNotNone(task_results[0]["output"])
@patch(
"builtins.open",
new_callable=unittest.mock.mock_open,
read_data="",
)
@patch("yaml.safe_load")
def test_missing_agents_in_yaml(self, mock_safe_load, mock_open):
# Mock YAML content with missing "agents" key
mock_safe_load.return_value = {}
# Test if the function raises an error for missing "agents" key
with self.assertRaises(ValueError) as context:
create_agents_from_yaml(
"fake_yaml_path.yaml", return_type="agents"
)
self.assertTrue(
"The YAML configuration does not contain 'agents'."
in str(context.exception)
)
@patch(
"builtins.open",
new_callable=unittest.mock.mock_open,
read_data="",
)
@patch("yaml.safe_load")
def test_invalid_return_type(self, mock_safe_load, mock_open):
# Mock YAML content parsing
mock_safe_load.return_value = {
"agents": [
{
"agent_name": "Financial-Analysis-Agent",
"model": {
"openai_api_key": "fake-api-key",
"model_name": "gpt-4o-mini",
"temperature": 0.1,
"max_tokens": 2000,
},
"system_prompt": "financial_agent_sys_prompt",
"max_loops": 1,
"autosave": True,
"dashboard": False,
"verbose": True,
"dynamic_temperature_enabled": True,
"saved_state_path": "finance_agent.json",
"user_name": "swarms_corp",
"retry_attempts": 1,
"context_length": 200000,
"return_step_meta": False,
"output_type": "str",
"task": "How can I establish a ROTH IRA to buy stocks and get a tax break?",
}
]
}
# Test if an error is raised for invalid return_type
with self.assertRaises(ValueError) as context:
create_agents_from_yaml(
"fake_yaml_path.yaml", return_type="invalid_type"
)
self.assertTrue(
"Invalid return_type" in str(context.exception)
)
if __name__ == "__main__":
unittest.main()

@ -1,190 +0,0 @@
#!/usr/bin/env python3
"""
Test script to verify the LiteLLM initialization fix for combined parameters.
This test ensures that llm_args, tools_list_dictionary, and MCP tools can be used together.
"""
import sys
from swarms import Agent
def test_combined_llm_args():
"""Test that llm_args, tools_list_dictionary, and MCP tools can be combined."""
# Mock tools list dictionary
tools_list = [
{
"type": "function",
"function": {
"name": "test_function",
"description": "A test function",
"parameters": {
"type": "object",
"properties": {
"test_param": {
"type": "string",
"description": "A test parameter",
}
},
},
},
}
]
# Mock llm_args with Azure OpenAI specific parameters
llm_args = {
"api_version": "2024-02-15-preview",
"base_url": "https://your-resource.openai.azure.com/",
"api_key": "your-api-key",
}
try:
# Test 1: Only llm_args
print("Testing Agent with only llm_args...")
Agent(
agent_name="test-agent-1",
model_name="gpt-4o-mini",
llm_args=llm_args,
)
print("✓ Agent with only llm_args created successfully")
# Test 2: Only tools_list_dictionary
print("Testing Agent with only tools_list_dictionary...")
Agent(
agent_name="test-agent-2",
model_name="gpt-4o-mini",
tools_list_dictionary=tools_list,
)
print(
"✓ Agent with only tools_list_dictionary created successfully"
)
# Test 3: Combined llm_args and tools_list_dictionary
print(
"Testing Agent with combined llm_args and tools_list_dictionary..."
)
agent3 = Agent(
agent_name="test-agent-3",
model_name="gpt-4o-mini",
llm_args=llm_args,
tools_list_dictionary=tools_list,
)
print(
"✓ Agent with combined llm_args and tools_list_dictionary created successfully"
)
# Test 4: Verify that the LLM instance has the correct configuration
print("Verifying LLM configuration...")
# Check that agent3 has both llm_args and tools configured
assert agent3.llm_args == llm_args, "llm_args not preserved"
assert (
agent3.tools_list_dictionary == tools_list
), "tools_list_dictionary not preserved"
# Check that the LLM instance was created
assert agent3.llm is not None, "LLM instance not created"
print("✓ LLM configuration verified successfully")
# Test 5: Test that the LLM can be called (without actually making API calls)
print("Testing LLM call preparation...")
try:
# This should not fail due to configuration issues
# We're not actually calling the API, just testing the setup
print("✓ LLM call preparation successful")
except Exception as e:
print(f"✗ LLM call preparation failed: {e}")
return False
print(
"\n🎉 All tests passed! The LiteLLM initialization fix is working correctly."
)
return True
except Exception as e:
print(f"✗ Test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_azure_openai_example():
"""Test the Azure OpenAI example with api_version parameter."""
print("\nTesting Azure OpenAI example with api_version...")
try:
# Create an agent with Azure OpenAI configuration
agent = Agent(
agent_name="azure-test-agent",
model_name="azure/gpt-4o",
llm_args={
"api_version": "2024-02-15-preview",
"base_url": "https://your-resource.openai.azure.com/",
"api_key": "your-api-key",
},
tools_list_dictionary=[
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather information",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state",
}
},
},
},
}
],
)
print(
"✓ Azure OpenAI agent with combined parameters created successfully"
)
# Verify configuration
assert agent.llm_args is not None, "llm_args not set"
assert (
"api_version" in agent.llm_args
), "api_version not in llm_args"
assert (
agent.tools_list_dictionary is not None
), "tools_list_dictionary not set"
assert (
len(agent.tools_list_dictionary) > 0
), "tools_list_dictionary is empty"
print("✓ Azure OpenAI configuration verified")
return True
except Exception as e:
print(f"✗ Azure OpenAI test failed: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
print("🧪 Testing LiteLLM initialization fix...")
success1 = test_combined_llm_args()
success2 = test_azure_openai_example()
if success1 and success2:
print("\n✅ All tests passed! The fix is working correctly.")
sys.exit(0)
else:
print(
"\n❌ Some tests failed. Please check the implementation."
)
sys.exit(1)

@ -1,73 +0,0 @@
#!/usr/bin/env python3
"""
Test script to verify that the llm_handling method properly handles args and kwargs.
"""
import sys
import os
# Add the swarms directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "swarms"))
from swarms.structs.agent import Agent
def test_llm_handling_args_kwargs():
"""Test that llm_handling properly handles both args and kwargs."""
# Create an agent instance
agent = Agent(
agent_name="test-agent",
model_name="gpt-4o-mini",
temperature=0.7,
max_tokens=1000,
)
# Test 1: Call llm_handling with kwargs
print("Test 1: Testing kwargs handling...")
try:
# This should work and add the kwargs to additional_args
agent.llm_handling(top_p=0.9, frequency_penalty=0.1)
print("✓ kwargs handling works")
except Exception as e:
print(f"✗ kwargs handling failed: {e}")
# Test 2: Call llm_handling with args (dictionary)
print("\nTest 2: Testing args handling with dictionary...")
try:
# This should merge the dictionary into additional_args
additional_config = {
"presence_penalty": 0.2,
"logit_bias": {"123": 1},
}
agent.llm_handling(additional_config)
print("✓ args handling with dictionary works")
except Exception as e:
print(f"✗ args handling with dictionary failed: {e}")
# Test 3: Call llm_handling with both args and kwargs
print("\nTest 3: Testing both args and kwargs...")
try:
# This should handle both
additional_config = {"presence_penalty": 0.3}
agent.llm_handling(
additional_config, top_p=0.8, frequency_penalty=0.2
)
print("✓ combined args and kwargs handling works")
except Exception as e:
print(f"✗ combined args and kwargs handling failed: {e}")
# Test 4: Call llm_handling with non-dictionary args
print("\nTest 4: Testing non-dictionary args...")
try:
# This should store args under 'additional_args' key
agent.llm_handling(
"some_string", 123, ["list", "of", "items"]
)
print("✓ non-dictionary args handling works")
except Exception as e:
print(f"✗ non-dictionary args handling failed: {e}")
if __name__ == "__main__":
test_llm_handling_args_kwargs()

@ -1,230 +0,0 @@
from unittest.mock import Mock, patch
import pytest
from transformers import AutoModelForCausalLM, AutoTokenizer
from swarms import ToolAgent
from swarms.agents.exceptions import (
ToolExecutionError,
ToolNotFoundError,
ToolParameterError,
)
def test_tool_agent_init():
model = Mock(spec=AutoModelForCausalLM)
tokenizer = Mock(spec=AutoTokenizer)
json_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "number"},
"is_student": {"type": "boolean"},
"courses": {"type": "array", "items": {"type": "string"}},
},
}
name = "Test Agent"
description = "This is a test agent"
agent = ToolAgent(
name, description, model, tokenizer, json_schema
)
assert agent.name == name
assert agent.description == description
assert agent.model == model
assert agent.tokenizer == tokenizer
assert agent.json_schema == json_schema
@patch.object(ToolAgent, "run")
def test_tool_agent_run(mock_run):
model = Mock(spec=AutoModelForCausalLM)
tokenizer = Mock(spec=AutoTokenizer)
json_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "number"},
"is_student": {"type": "boolean"},
"courses": {"type": "array", "items": {"type": "string"}},
},
}
name = "Test Agent"
description = "This is a test agent"
task = (
"Generate a person's information based on the following"
" schema:"
)
agent = ToolAgent(
name, description, model, tokenizer, json_schema
)
agent.run(task)
mock_run.assert_called_once_with(task)
def test_tool_agent_init_with_kwargs():
model = Mock(spec=AutoModelForCausalLM)
tokenizer = Mock(spec=AutoTokenizer)
json_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "number"},
"is_student": {"type": "boolean"},
"courses": {"type": "array", "items": {"type": "string"}},
},
}
name = "Test Agent"
description = "This is a test agent"
kwargs = {
"debug": True,
"max_array_length": 20,
"max_number_tokens": 12,
"temperature": 0.5,
"max_string_token_length": 20,
}
agent = ToolAgent(
name, description, model, tokenizer, json_schema, **kwargs
)
assert agent.name == name
assert agent.description == description
assert agent.model == model
assert agent.tokenizer == tokenizer
assert agent.json_schema == json_schema
assert agent.debug == kwargs["debug"]
assert agent.max_array_length == kwargs["max_array_length"]
assert agent.max_number_tokens == kwargs["max_number_tokens"]
assert agent.temperature == kwargs["temperature"]
assert (
agent.max_string_token_length
== kwargs["max_string_token_length"]
)
def test_tool_agent_initialization():
"""Test tool agent initialization with valid parameters."""
agent = ToolAgent(
model_name="test-model", temperature=0.7, max_tokens=1000
)
assert agent.model_name == "test-model"
assert agent.temperature == 0.7
assert agent.max_tokens == 1000
assert agent.retry_attempts == 3
assert agent.retry_interval == 1.0
def test_tool_agent_initialization_error():
"""Test tool agent initialization with invalid model."""
with pytest.raises(ToolExecutionError) as exc_info:
ToolAgent(model_name="invalid-model")
assert "model_initialization" in str(exc_info.value)
def test_tool_validation():
"""Test tool parameter validation."""
tools_list = [
{
"name": "test_tool",
"parameters": [
{"name": "required_param", "required": True},
{"name": "optional_param", "required": False},
],
}
]
agent = ToolAgent(tools_list_dictionary=tools_list)
# Test missing required parameter
with pytest.raises(ToolParameterError) as exc_info:
agent._validate_tool("test_tool", {})
assert "Missing required parameters" in str(exc_info.value)
# Test valid parameters
agent._validate_tool("test_tool", {"required_param": "value"})
# Test non-existent tool
with pytest.raises(ToolNotFoundError) as exc_info:
agent._validate_tool("non_existent_tool", {})
assert "Tool 'non_existent_tool' not found" in str(exc_info.value)
def test_retry_mechanism():
"""Test retry mechanism for failed operations."""
mock_llm = Mock()
mock_llm.generate.side_effect = [
Exception("First attempt failed"),
Exception("Second attempt failed"),
Mock(outputs=[Mock(text="Success")]),
]
agent = ToolAgent(model_name="test-model")
agent.llm = mock_llm
# Test successful retry
result = agent.run("test task")
assert result == "Success"
assert mock_llm.generate.call_count == 3
# Test all retries failing
mock_llm.generate.side_effect = Exception("All attempts failed")
with pytest.raises(ToolExecutionError) as exc_info:
agent.run("test task")
assert "All attempts failed" in str(exc_info.value)
def test_batched_execution():
"""Test batched execution with error handling."""
mock_llm = Mock()
mock_llm.generate.side_effect = [
Mock(outputs=[Mock(text="Success 1")]),
Exception("Task 2 failed"),
Mock(outputs=[Mock(text="Success 3")]),
]
agent = ToolAgent(model_name="test-model")
agent.llm = mock_llm
tasks = ["Task 1", "Task 2", "Task 3"]
results = agent.batched_run(tasks)
assert len(results) == 3
assert results[0] == "Success 1"
assert "Error" in results[1]
assert results[2] == "Success 3"
def test_prompt_preparation():
"""Test prompt preparation with and without system prompt."""
# Test without system prompt
agent = ToolAgent()
prompt = agent._prepare_prompt("test task")
assert prompt == "User: test task\nAssistant:"
# Test with system prompt
agent = ToolAgent(system_prompt="You are a helpful assistant")
prompt = agent._prepare_prompt("test task")
assert (
prompt
== "You are a helpful assistant\n\nUser: test task\nAssistant:"
)
def test_tool_execution_error_handling():
"""Test error handling during tool execution."""
agent = ToolAgent(model_name="test-model")
agent.llm = None # Simulate uninitialized LLM
with pytest.raises(ToolExecutionError) as exc_info:
agent.run("test task")
assert "LLM not initialized" in str(exc_info.value)
# Test with invalid parameters
with pytest.raises(ToolExecutionError) as exc_info:
agent.run("test task", invalid_param="value")
assert "Error running task" in str(exc_info.value)

@ -1,171 +0,0 @@
from time import perf_counter_ns
import psutil
import os
from rich.panel import Panel
from rich.console import Console
from rich.table import Table
from statistics import mean, median, stdev, variance
from swarms.structs.agent import Agent
from swarms.prompts.finance_agent_sys_prompt import (
FINANCIAL_AGENT_SYS_PROMPT,
)
def get_memory_stats(memory_readings):
"""Calculate memory statistics"""
return {
"peak": max(memory_readings),
"min": min(memory_readings),
"mean": mean(memory_readings),
"median": median(memory_readings),
"stdev": (
stdev(memory_readings) if len(memory_readings) > 1 else 0
),
"variance": (
variance(memory_readings)
if len(memory_readings) > 1
else 0
),
}
def get_time_stats(times):
"""Calculate time statistics"""
return {
"total": sum(times),
"mean": mean(times),
"median": median(times),
"min": min(times),
"max": max(times),
"stdev": stdev(times) if len(times) > 1 else 0,
"variance": variance(times) if len(times) > 1 else 0,
}
def benchmark_multiple_agents(num_agents=100):
console = Console()
init_times = []
memory_readings = []
process = psutil.Process(os.getpid())
# Create benchmark tables
time_table = Table(title="Time Statistics")
time_table.add_column("Metric", style="cyan")
time_table.add_column("Value", style="green")
memory_table = Table(title="Memory Statistics")
memory_table.add_column("Metric", style="cyan")
memory_table.add_column("Value", style="green")
initial_memory = process.memory_info().rss / 1024
start_total_time = perf_counter_ns()
# Initialize agents and measure performance
for i in range(num_agents):
start_time = perf_counter_ns()
Agent(
agent_name=f"Financial-Analysis-Agent-{i}",
agent_description="Personal finance advisor agent",
system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
max_loops=2,
model_name="gpt-4o-mini",
dynamic_temperature_enabled=True,
interactive=False,
)
init_time = (perf_counter_ns() - start_time) / 1_000_000
init_times.append(init_time)
current_memory = process.memory_info().rss / 1024
memory_readings.append(current_memory - initial_memory)
if (i + 1) % 10 == 0:
console.print(
f"Created {i + 1} agents...", style="bold blue"
)
total_elapsed_time = (
perf_counter_ns() - start_total_time
) / 1_000_000
# Calculate statistics
time_stats = get_time_stats(init_times)
memory_stats = get_memory_stats(memory_readings)
# Add time measurements
time_table.add_row(
"Total Wall Time", f"{total_elapsed_time:.2f} ms"
)
time_table.add_row(
"Total Init Time", f"{time_stats['total']:.2f} ms"
)
time_table.add_row(
"Average Init Time", f"{time_stats['mean']:.2f} ms"
)
time_table.add_row(
"Median Init Time", f"{time_stats['median']:.2f} ms"
)
time_table.add_row("Fastest Init", f"{time_stats['min']:.2f} ms")
time_table.add_row("Slowest Init", f"{time_stats['max']:.2f} ms")
time_table.add_row(
"Std Deviation", f"{time_stats['stdev']:.2f} ms"
)
time_table.add_row(
"Variance", f"{time_stats['variance']:.4f} ms²"
)
time_table.add_row(
"Throughput",
f"{(num_agents/total_elapsed_time) * 1000:.2f} agents/second",
)
time_table.add_row(
"Agents per Minute",
f"{(num_agents/total_elapsed_time) * 60000:.0f} agents/minute",
)
# Add memory measurements
memory_table.add_row(
"Peak Memory Usage", f"{memory_stats['peak']:.2f} KB"
)
memory_table.add_row(
"Minimum Memory Usage", f"{memory_stats['min']:.2f} KB"
)
memory_table.add_row(
"Average Memory Usage", f"{memory_stats['mean']:.2f} KB"
)
memory_table.add_row(
"Median Memory Usage", f"{memory_stats['median']:.2f} KB"
)
memory_table.add_row(
"Memory Std Deviation", f"{memory_stats['stdev']:.2f} KB"
)
memory_table.add_row(
"Memory Variance", f"{memory_stats['variance']:.2f} KB²"
)
memory_table.add_row(
"Avg Memory Per Agent",
f"{memory_stats['mean']/num_agents:.2f} KB",
)
# Create and display panels
time_panel = Panel(
time_table,
title="Time Benchmark Results",
border_style="blue",
padding=(1, 2),
)
memory_panel = Panel(
memory_table,
title="Memory Benchmark Results",
border_style="green",
padding=(1, 2),
)
console.print(time_panel)
console.print("\n")
console.print(memory_panel)
if __name__ == "__main__":
benchmark_multiple_agents(1000)

@ -1,284 +0,0 @@
import asyncio
import concurrent.futures
import json
import os
import psutil
import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional
from swarms.structs.agent import Agent
from loguru import logger
class AgentBenchmark:
def __init__(
self,
num_iterations: int = 5,
output_dir: str = "benchmark_results",
):
self.num_iterations = num_iterations
self.output_dir = Path(output_dir)
self.output_dir.mkdir(exist_ok=True)
# Use process pool for CPU-bound tasks
self.process_pool = concurrent.futures.ProcessPoolExecutor(
max_workers=min(os.cpu_count(), 4)
)
# Use thread pool for I/O-bound tasks
self.thread_pool = concurrent.futures.ThreadPoolExecutor(
max_workers=min(os.cpu_count() * 2, 8)
)
self.default_queries = [
"Conduct an analysis of the best real undervalued ETFs",
"What are the top performing tech stocks this quarter?",
"Analyze current market trends in renewable energy sector",
"Compare Bitcoin and Ethereum investment potential",
"Evaluate the risk factors in emerging markets",
]
self.agent = self._initialize_agent()
self.process = psutil.Process()
# Cache for storing repeated query results
self._query_cache = {}
def _initialize_agent(self) -> Agent:
return Agent(
agent_name="Financial-Analysis-Agent",
agent_description="Personal finance advisor agent",
# system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
max_loops=1,
model_name="gpt-4o-mini",
dynamic_temperature_enabled=True,
interactive=False,
)
def _get_system_metrics(self) -> Dict[str, float]:
# Optimized system metrics collection
return {
"cpu_percent": self.process.cpu_percent(),
"memory_mb": self.process.memory_info().rss / 1024 / 1024,
}
def _calculate_statistics(
self, values: List[float]
) -> Dict[str, float]:
if not values:
return {}
sorted_values = sorted(values)
n = len(sorted_values)
mean_val = sum(values) / n
stats = {
"mean": mean_val,
"median": sorted_values[n // 2],
"min": sorted_values[0],
"max": sorted_values[-1],
}
# Only calculate stdev if we have enough values
if n > 1:
stats["std_dev"] = (
sum((x - mean_val) ** 2 for x in values) / n
) ** 0.5
return {k: round(v, 3) for k, v in stats.items()}
async def process_iteration(
self, query: str, iteration: int
) -> Dict[str, Any]:
"""Process a single iteration of a query"""
try:
# Check cache for repeated queries
cache_key = f"{query}_{iteration}"
if cache_key in self._query_cache:
return self._query_cache[cache_key]
iteration_start = datetime.datetime.now()
pre_metrics = self._get_system_metrics()
# Run the agent
try:
self.agent.run(query)
success = True
except Exception as e:
str(e)
success = False
execution_time = (
datetime.datetime.now() - iteration_start
).total_seconds()
post_metrics = self._get_system_metrics()
result = {
"execution_time": execution_time,
"success": success,
"pre_metrics": pre_metrics,
"post_metrics": post_metrics,
"iteration_data": {
"iteration": iteration + 1,
"execution_time": round(execution_time, 3),
"success": success,
"system_metrics": {
"pre": pre_metrics,
"post": post_metrics,
},
},
}
# Cache the result
self._query_cache[cache_key] = result
return result
except Exception as e:
logger.error(f"Error in iteration {iteration}: {e}")
raise
async def run_benchmark(
self, queries: Optional[List[str]] = None
) -> Dict[str, Any]:
"""Run the benchmark asynchronously"""
queries = queries or self.default_queries
benchmark_data = {
"metadata": {
"timestamp": datetime.datetime.now().isoformat(),
"num_iterations": self.num_iterations,
"agent_config": {
"model_name": self.agent.model_name,
"max_loops": self.agent.max_loops,
},
},
"results": {},
}
async def process_query(query: str):
query_results = {
"execution_times": [],
"system_metrics": [],
"iterations": [],
}
# Process iterations concurrently
tasks = [
self.process_iteration(query, i)
for i in range(self.num_iterations)
]
iteration_results = await asyncio.gather(*tasks)
for result in iteration_results:
query_results["execution_times"].append(
result["execution_time"]
)
query_results["system_metrics"].append(
result["post_metrics"]
)
query_results["iterations"].append(
result["iteration_data"]
)
# Calculate statistics
query_results["statistics"] = {
"execution_time": self._calculate_statistics(
query_results["execution_times"]
),
"memory_usage": self._calculate_statistics(
[
m["memory_mb"]
for m in query_results["system_metrics"]
]
),
"cpu_usage": self._calculate_statistics(
[
m["cpu_percent"]
for m in query_results["system_metrics"]
]
),
}
return query, query_results
# Execute all queries concurrently
query_tasks = [process_query(query) for query in queries]
query_results = await asyncio.gather(*query_tasks)
for query, results in query_results:
benchmark_data["results"][query] = results
return benchmark_data
def save_results(self, benchmark_data: Dict[str, Any]) -> str:
"""Save benchmark results efficiently"""
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = (
self.output_dir / f"benchmark_results_{timestamp}.json"
)
# Write results in a single operation
with open(filename, "w") as f:
json.dump(benchmark_data, f, indent=2)
logger.info(f"Benchmark results saved to: {filename}")
return str(filename)
def print_summary(self, results: Dict[str, Any]):
"""Print a summary of the benchmark results"""
print("\n=== Benchmark Summary ===")
for query, data in results["results"].items():
print(f"\nQuery: {query[:50]}...")
stats = data["statistics"]["execution_time"]
print(f"Average time: {stats['mean']:.2f}s")
print(
f"Memory usage (avg): {data['statistics']['memory_usage']['mean']:.1f}MB"
)
print(
f"CPU usage (avg): {data['statistics']['cpu_usage']['mean']:.1f}%"
)
async def run_with_timeout(
self, timeout: int = 300
) -> Dict[str, Any]:
"""Run benchmark with timeout"""
try:
return await asyncio.wait_for(
self.run_benchmark(), timeout
)
except asyncio.TimeoutError:
logger.error(
f"Benchmark timed out after {timeout} seconds"
)
raise
def cleanup(self):
"""Cleanup resources"""
self.process_pool.shutdown()
self.thread_pool.shutdown()
self._query_cache.clear()
async def main():
try:
# Create and run benchmark
benchmark = AgentBenchmark(num_iterations=1)
# Run benchmark with timeout
results = await benchmark.run_with_timeout(timeout=300)
# Save results
benchmark.save_results(results)
# Print summary
benchmark.print_summary(results)
except Exception as e:
logger.error(f"Benchmark failed: {e}")
finally:
# Cleanup resources
benchmark.cleanup()
if __name__ == "__main__":
# Run the async main function
asyncio.run(main())

@ -1,318 +0,0 @@
import json
import os
import platform
import sys
import traceback
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, List, Optional
import psutil
import requests
from loguru import logger
from swarm_models import OpenAIChat
from swarms.structs.agent import Agent
@dataclass
class SwarmSystemInfo:
"""System information for Swarms issue reports."""
os_name: str
os_version: str
python_version: str
cpu_usage: float
memory_usage: float
disk_usage: float
swarms_version: str # Added Swarms version tracking
cuda_available: bool # Added CUDA availability check
gpu_info: Optional[str] # Added GPU information
class SwarmsIssueReporter:
"""
Production-grade GitHub issue reporter specifically designed for the Swarms library.
Automatically creates detailed issues for the https://github.com/kyegomez/swarms repository.
Features:
- Swarms-specific error categorization
- Automatic version and dependency tracking
- CUDA and GPU information collection
- Integration with Swarms logging system
- Detailed environment information
"""
REPO_OWNER = "kyegomez"
REPO_NAME = "swarms"
ISSUE_CATEGORIES = {
"agent": ["agent", "automation"],
"memory": ["memory", "storage"],
"tool": ["tools", "integration"],
"llm": ["llm", "model"],
"performance": ["performance", "optimization"],
"compatibility": ["compatibility", "environment"],
}
def __init__(
self,
github_token: str,
rate_limit: int = 10,
rate_period: int = 3600,
log_file: str = "swarms_issues.log",
enable_duplicate_check: bool = True,
):
"""
Initialize the Swarms Issue Reporter.
Args:
github_token (str): GitHub personal access token
rate_limit (int): Maximum number of issues to create per rate_period
rate_period (int): Time period for rate limiting in seconds
log_file (str): Path to log file
enable_duplicate_check (bool): Whether to check for duplicate issues
"""
self.github_token = github_token
self.rate_limit = rate_limit
self.rate_period = rate_period
self.enable_duplicate_check = enable_duplicate_check
self.github_token = os.getenv("GITHUB_API_KEY")
# Initialize logging
log_path = os.path.join(os.getcwd(), "logs", log_file)
os.makedirs(os.path.dirname(log_path), exist_ok=True)
# Issue tracking
self.issues_created = []
self.last_issue_time = datetime.now()
def _get_swarms_version(self) -> str:
"""Get the installed version of Swarms."""
try:
import swarms
return swarms.__version__
except:
return "Unknown"
def _get_system_info(self) -> SwarmSystemInfo:
"""Collect system and Swarms-specific information."""
return SwarmSystemInfo(
os_name=platform.system(),
os_version=platform.version(),
python_version=sys.version,
cpu_usage=psutil.cpu_percent(),
memory_usage=psutil.virtual_memory().percent,
disk_usage=psutil.disk_usage("/").percent,
swarms_version=self._get_swarms_version(),
)
def _categorize_error(
self, error: Exception, context: Dict
) -> List[str]:
"""Categorize the error and return appropriate labels."""
error_str = str(error).lower()
type(error).__name__
labels = ["bug", "automated"]
# Check error message and context for category keywords
for (
category,
category_labels,
) in self.ISSUE_CATEGORIES.items():
if any(
keyword in error_str for keyword in category_labels
):
labels.extend(category_labels)
break
# Add severity label based on error type
if issubclass(type(error), (SystemError, MemoryError)):
labels.append("severity:critical")
elif issubclass(type(error), (ValueError, TypeError)):
labels.append("severity:medium")
else:
labels.append("severity:low")
return list(set(labels)) # Remove duplicates
def _format_swarms_issue_body(
self,
error: Exception,
system_info: SwarmSystemInfo,
context: Dict,
) -> str:
"""Format the issue body with Swarms-specific information."""
return f"""
## Swarms Error Report
- **Error Type**: {type(error).__name__}
- **Error Message**: {str(error)}
- **Swarms Version**: {system_info.swarms_version}
## Environment Information
- **OS**: {system_info.os_name} {system_info.os_version}
- **Python Version**: {system_info.python_version}
- **CUDA Available**: {system_info.cuda_available}
- **GPU**: {system_info.gpu_info or "N/A"}
- **CPU Usage**: {system_info.cpu_usage}%
- **Memory Usage**: {system_info.memory_usage}%
- **Disk Usage**: {system_info.disk_usage}%
## Stack Trace
{traceback.format_exc()}
## Context
{json.dumps(context, indent=2)}
## Dependencies
{self._get_dependencies_info()}
## Time of Occurrence
{datetime.now().isoformat()}
---
*This issue was automatically generated by SwarmsIssueReporter*
"""
def _get_dependencies_info(self) -> str:
"""Get information about installed dependencies."""
try:
import pkg_resources
deps = []
for dist in pkg_resources.working_set:
deps.append(f"- {dist.key} {dist.version}")
return "\n".join(deps)
except:
return "Unable to fetch dependency information"
# First, add this method to your SwarmsIssueReporter class
def _check_rate_limit(self) -> bool:
"""Check if we're within rate limits."""
now = datetime.now()
time_diff = (now - self.last_issue_time).total_seconds()
if (
len(self.issues_created) >= self.rate_limit
and time_diff < self.rate_period
):
logger.warning("Rate limit exceeded for issue creation")
return False
# Clean up old issues from tracking
self.issues_created = [
time
for time in self.issues_created
if (now - time).total_seconds() < self.rate_period
]
return True
def report_swarms_issue(
self,
error: Exception,
agent: Optional[Agent] = None,
context: Dict[str, Any] = None,
priority: str = "normal",
) -> Optional[int]:
"""
Report a Swarms-specific issue to GitHub.
Args:
error (Exception): The exception to report
agent (Optional[Agent]): The Swarms agent instance that encountered the error
context (Dict[str, Any]): Additional context about the error
priority (str): Issue priority ("low", "normal", "high", "critical")
Returns:
Optional[int]: Issue number if created successfully
"""
try:
if not self._check_rate_limit():
logger.warning(
"Skipping issue creation due to rate limit"
)
return None
# Collect system information
system_info = self._get_system_info()
# Prepare context with agent information if available
full_context = context or {}
if agent:
full_context.update(
{
"agent_name": agent.agent_name,
"agent_description": agent.agent_description,
"max_loops": agent.max_loops,
"context_length": agent.context_length,
}
)
# Create issue title
title = f"[{type(error).__name__}] {str(error)[:100]}"
if agent:
title = f"[Agent: {agent.agent_name}] {title}"
# Get appropriate labels
labels = self._categorize_error(error, full_context)
labels.append(f"priority:{priority}")
# Create the issue
url = f"https://api.github.com/repos/{self.REPO_OWNER}/{self.REPO_NAME}/issues"
data = {
"title": title,
"body": self._format_swarms_issue_body(
error, system_info, full_context
),
"labels": labels,
}
response = requests.post(
url,
headers={
"Authorization": f"token {self.github_token}"
},
json=data,
)
response.raise_for_status()
issue_number = response.json()["number"]
logger.info(
f"Successfully created Swarms issue #{issue_number}"
)
return issue_number
except Exception as e:
logger.error(f"Error creating Swarms issue: {str(e)}")
return None
# Setup the reporter with your GitHub token
reporter = SwarmsIssueReporter(
github_token=os.getenv("GITHUB_API_KEY")
)
# Force an error to test the reporter
try:
# This will raise an error since the input isn't valid
# Create an agent that might have issues
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(agent_name="Test-Agent", max_loops=1)
result = agent.run(None)
raise ValueError("test")
except Exception as e:
# Report the issue
issue_number = reporter.report_swarms_issue(
error=e,
agent=agent,
context={"task": "test_run"},
priority="high",
)
print(f"Created issue number: {issue_number}")

@ -1,180 +0,0 @@
import requests
import datetime
from typing import List, Dict, Tuple
from loguru import logger
from swarms import Agent
from swarm_models import OpenAIChat
# GitHub API Configurations
GITHUB_REPO = "kyegomez/swarms" # Swarms GitHub repository
GITHUB_API_URL = f"https://api.github.com/repos/{GITHUB_REPO}/commits"
# Step 1: Fetch the latest commits from GitHub
def fetch_latest_commits(
repo_url: str, limit: int = 5
) -> List[Dict[str, str]]:
"""
Fetch the latest commits from a public GitHub repository.
"""
logger.info(
f"Fetching the latest {limit} commits from {repo_url}"
)
try:
params = {"per_page": limit}
response = requests.get(repo_url, params=params)
response.raise_for_status()
commits = response.json()
commit_data = []
for commit in commits:
commit_data.append(
{
"sha": commit["sha"][:7], # Short commit hash
"author": commit["commit"]["author"]["name"],
"message": commit["commit"]["message"],
"date": commit["commit"]["author"]["date"],
}
)
logger.success("Successfully fetched commit data")
return commit_data
except Exception as e:
logger.error(f"Error fetching commits: {e}")
raise
# Step 2: Format commits and fetch current time
def format_commits_with_time(
commits: List[Dict[str, str]],
) -> Tuple[str, str]:
"""
Format commit data into a readable string and return current time.
"""
current_time = datetime.datetime.now().strftime(
"%Y-%m-%d %H:%M:%S"
)
logger.info(f"Formatting commits at {current_time}")
commit_summary = "\n".join(
[
f"- `{commit['sha']}` by {commit['author']} on {commit['date']}: {commit['message']}"
for commit in commits
]
)
logger.success("Commits formatted successfully")
return current_time, commit_summary
# Step 3: Build a dynamic system prompt
def build_custom_system_prompt(
current_time: str, commit_summary: str
) -> str:
"""
Build a dynamic system prompt with the current time and commit summary.
"""
logger.info("Building the custom system prompt for the agent")
prompt = f"""
You are a software analyst tasked with summarizing the latest commits from the Swarms GitHub repository.
The current time is **{current_time}**.
Here are the latest commits:
{commit_summary}
**Your task**:
1. Summarize the changes into a clear and concise table in **markdown format**.
2. Highlight the key improvements and fixes.
3. End your output with the token `<DONE>`.
Make sure the table includes the following columns: Commit SHA, Author, Date, and Commit Message.
"""
logger.success("System prompt created successfully")
return prompt
# Step 4: Initialize the Agent
def initialize_agent() -> Agent:
"""
Initialize the Swarms agent with OpenAI model.
"""
logger.info("Initializing the agent with GPT-4o")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Commit-Summarization-Agent",
agent_description="Fetch and summarize GitHub commits for Swarms repository.",
system_prompt="", # Will set dynamically
max_loops=1,
llm=model,
dynamic_temperature_enabled=True,
user_name="Kye",
retry_attempts=3,
context_length=8192,
return_step_meta=False,
output_type="str",
auto_generate_prompt=False,
max_tokens=4000,
stopping_token="<DONE>",
interactive=False,
)
logger.success("Agent initialized successfully")
return agent
# Step 5: Run the Agent with Data
def summarize_commits_with_agent(agent: Agent, prompt: str) -> str:
"""
Pass the system prompt to the agent and fetch the result.
"""
logger.info("Sending data to the agent for summarization")
try:
result = agent.run(
f"{prompt}",
all_cores=True,
)
logger.success("Agent completed the summarization task")
return result
except Exception as e:
logger.error(f"Agent encountered an error: {e}")
raise
# Main Execution
if __name__ == "__main__":
try:
logger.info("Starting commit summarization process")
# Fetch latest commits
latest_commits = fetch_latest_commits(GITHUB_API_URL, limit=5)
# Format commits and get current time
current_time, commit_summary = format_commits_with_time(
latest_commits
)
# Build the custom system prompt
custom_system_prompt = build_custom_system_prompt(
current_time, commit_summary
)
# Initialize agent
agent = initialize_agent()
# Set the dynamic system prompt
agent.system_prompt = custom_system_prompt
# Run the agent and summarize commits
result = summarize_commits_with_agent(
agent, custom_system_prompt
)
# Print the result
print("### Commit Summary in Markdown:")
print(result)
except Exception as e:
logger.critical(f"Process failed: {e}")

@ -1,46 +0,0 @@
import os
import uuid
from swarms import Agent
from swarm_models import OpenAIChat
from swarms.prompts.finance_agent_sys_prompt import (
FINANCIAL_AGENT_SYS_PROMPT,
)
import time
start_time = time.time()
# Get the OpenAI API key from the environment variable
api_key = os.getenv("OPENAI_API_KEY")
# Create an instance of the OpenAIChat class
model = OpenAIChat(
api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
)
agent = Agent(
agent_name=f"{uuid.uuid4().hex}",
system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
llm=model,
max_loops=1,
autosave=True,
dashboard=False,
verbose=True,
dynamic_temperature_enabled=True,
saved_state_path=f"{uuid.uuid4().hex}",
user_name="swarms_corp",
retry_attempts=1,
context_length=3000,
return_step_meta=False,
)
out = agent.run(
"How can I establish a ROTH IRA to buy stocks and get a tax break? What are the criteria"
)
print(out)
end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")
# Execution time: 9.922541856765747 seconds for the whole script

Binary file not shown.

Before

Width:  |  Height:  |  Size: 175 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

@ -1,91 +0,0 @@
agent_count,test_name,model_name,latency_ms,throughput_rps,memory_usage_mb,cpu_usage_percent,success_rate,error_count,total_requests,concurrent_requests,timestamp,cost_usd,tokens_used,response_quality_score,additional_metrics,agent_creation_time,tool_registration_time,execution_time,total_latency,chaining_steps,chaining_success,error_scenarios_tested,recovery_rate,resource_cycles,avg_memory_delta,memory_leak_detected
1,scaling_test,gpt-4o-mini,1131.7063331604004,4.131429224630576,1.25,0.0,1.0,0,20,5,1759345643.9453266,0.0015359999999999996,10240,0.8548663728748707,"{'min_latency_ms': 562.7951622009277, 'max_latency_ms': 1780.4391384124756, 'p95_latency_ms': np.float64(1744.0685987472534), 'p99_latency_ms': np.float64(1773.1650304794312), 'total_time_s': 4.84093976020813, 'initial_memory_mb': 291.5546875, 'final_memory_mb': 292.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.679999999999998e-05, 'quality_std': 0.0675424923987846, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,gpt-4o-mini,1175.6950378417969,3.7575854004826277,0.0,0.0,1.0,0,20,5,1759345654.225195,0.0015359999999999996,10240,0.8563524483655013,"{'min_latency_ms': 535.4223251342773, 'max_latency_ms': 1985.3930473327637, 'p95_latency_ms': np.float64(1975.6355285644531), 'p99_latency_ms': np.float64(1983.4415435791016), 'total_time_s': 5.322566986083984, 'initial_memory_mb': 293.1796875, 'final_memory_mb': 293.1796875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.679999999999998e-05, 'quality_std': 0.05770982402152013, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,gpt-4o-mini,996.9684720039368,4.496099509029146,0.0,0.0,1.0,0,20,5,1759345662.8977199,0.0015359999999999996,10240,0.8844883644941982,"{'min_latency_ms': 45.22204399108887, 'max_latency_ms': 1962.2983932495117, 'p95_latency_ms': np.float64(1647.7753758430483), 'p99_latency_ms': np.float64(1899.3937897682185), 'total_time_s': 4.448300123214722, 'initial_memory_mb': 293.5546875, 'final_memory_mb': 293.5546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.679999999999998e-05, 'quality_std': 0.043434832388308614, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,gpt-4o-mini,1112.8681421279907,3.587833950074127,0.0,0.0,1.0,0,20,5,1759345673.162652,0.0015359999999999996,10240,0.8563855623109009,"{'min_latency_ms': 564.1369819641113, 'max_latency_ms': 1951.472282409668, 'p95_latency_ms': np.float64(1897.4883794784546), 'p99_latency_ms': np.float64(1940.6755018234253), 'total_time_s': 5.57439398765564, 'initial_memory_mb': 293.8046875, 'final_memory_mb': 293.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.679999999999998e-05, 'quality_std': 0.05691925404970228, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1,scaling_test,gpt-4o,1298.2240080833435,3.3670995599405846,0.125,0.0,1.0,0,20,5,1759345683.2065425,0.0512,10240,0.9279627852934385,"{'min_latency_ms': 693.6078071594238, 'max_latency_ms': 1764.8026943206787, 'p95_latency_ms': np.float64(1681.7602753639221), 'p99_latency_ms': np.float64(1748.1942105293274), 'total_time_s': 5.939830303192139, 'initial_memory_mb': 293.8046875, 'final_memory_mb': 293.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.050879141399088765, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,gpt-4o,1264.4854545593262,3.5293826102318846,0.0,0.0,1.0,0,20,5,1759345692.6439528,0.0512,10240,0.9737471278894755,"{'min_latency_ms': 175.65083503723145, 'max_latency_ms': 1990.2207851409912, 'p95_latency_ms': np.float64(1910.3824019432068), 'p99_latency_ms': np.float64(1974.2531085014343), 'total_time_s': 5.66671347618103, 'initial_memory_mb': 293.9296875, 'final_memory_mb': 293.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.038542680129780495, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,gpt-4o,1212.0607376098633,3.799000004302323,0.125,0.0,1.0,0,20,5,1759345701.8719423,0.0512,10240,0.9366077507029601,"{'min_latency_ms': 542.8001880645752, 'max_latency_ms': 1973.801851272583, 'p95_latency_ms': np.float64(1969.2555904388428), 'p99_latency_ms': np.float64(1972.892599105835), 'total_time_s': 5.264543294906616, 'initial_memory_mb': 293.9296875, 'final_memory_mb': 294.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.044670864578792276, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,gpt-4o,1367.1631932258606,3.1229790107314654,0.0,0.0,1.0,0,20,5,1759345711.9738443,0.0512,10240,0.9328922198254587,"{'min_latency_ms': 715.888261795044, 'max_latency_ms': 1905.6315422058105, 'p95_latency_ms': np.float64(1890.480661392212), 'p99_latency_ms': np.float64(1902.6013660430908), 'total_time_s': 6.404141664505005, 'initial_memory_mb': 294.0546875, 'final_memory_mb': 294.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.05146728864962903, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1,scaling_test,gpt-4-turbo,1429.1370868682861,3.3141614744089267,0.125,0.0,1.0,0,20,5,1759345722.7650242,0.1024,10240,0.960928099222926,"{'min_latency_ms': 637.6686096191406, 'max_latency_ms': 1994.9300289154053, 'p95_latency_ms': np.float64(1973.6997246742249), 'p99_latency_ms': np.float64(1990.6839680671692), 'total_time_s': 6.0347089767456055, 'initial_memory_mb': 294.0546875, 'final_memory_mb': 294.1796875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.0429193742204114, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,gpt-4-turbo,1167.8012132644653,3.933946564951724,0.0,0.0,1.0,0,20,5,1759345731.809648,0.1024,10240,0.9575695597206497,"{'min_latency_ms': 521.2328433990479, 'max_latency_ms': 1973.503828048706, 'p95_latency_ms': np.float64(1931.3542008399963), 'p99_latency_ms': np.float64(1965.073902606964), 'total_time_s': 5.083953142166138, 'initial_memory_mb': 294.1796875, 'final_memory_mb': 294.1796875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.04742414087184447, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,gpt-4-turbo,1435.1954460144043,3.0793869953124613,0.0,0.0,1.0,0,20,5,1759345741.9117725,0.1024,10240,0.9564233524947511,"{'min_latency_ms': 711.4903926849365, 'max_latency_ms': 2034.2109203338623, 'p95_latency_ms': np.float64(1998.979663848877), 'p99_latency_ms': np.float64(2027.1646690368652), 'total_time_s': 6.4947991371154785, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.03428874308764032, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,gpt-4-turbo,1092.1013355255127,4.057819053252887,0.0,0.0,1.0,0,20,5,1759345749.8833907,0.1024,10240,0.9521218582720758,"{'min_latency_ms': 554.4416904449463, 'max_latency_ms': 1968.658447265625, 'p95_latency_ms': np.float64(1637.098050117493), 'p99_latency_ms': np.float64(1902.346367835998), 'total_time_s': 4.92875599861145, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.043763298033728824, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1,scaling_test,claude-3-5-sonnet,1046.9236850738525,4.047496446876068,0.0,0.0,1.0,0,20,5,1759345757.9539518,0.03071999999999999,10240,0.9511838758969231,"{'min_latency_ms': 184.94415283203125, 'max_latency_ms': 1966.0136699676514, 'p95_latency_ms': np.float64(1677.8094530105593), 'p99_latency_ms': np.float64(1908.3728265762325), 'total_time_s': 4.941326141357422, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999996, 'quality_std': 0.03727295215254124, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,claude-3-5-sonnet,1381.3772201538086,3.283979343278356,0.0,0.0,1.0,0,20,5,1759345768.7153368,0.03071999999999999,10240,0.957817098536435,"{'min_latency_ms': 543.0643558502197, 'max_latency_ms': 1937.4654293060303, 'p95_latency_ms': np.float64(1931.4598441123962), 'p99_latency_ms': np.float64(1936.2643122673035), 'total_time_s': 6.090172290802002, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999996, 'quality_std': 0.044335695599357156, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,claude-3-5-sonnet,1314.3961310386658,3.5243521468336656,0.0,0.0,1.0,0,20,5,1759345778.6269403,0.03071999999999999,10240,0.9749641888502683,"{'min_latency_ms': 535.1722240447998, 'max_latency_ms': 1983.6831092834473, 'p95_latency_ms': np.float64(1918.512487411499), 'p99_latency_ms': np.float64(1970.6489849090576), 'total_time_s': 5.674801826477051, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999996, 'quality_std': 0.03856740540886548, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,claude-3-5-sonnet,1120.720875263214,3.7028070875807546,0.0,0.0,1.0,0,20,5,1759345788.3161702,0.03071999999999999,10240,0.9344569749738585,"{'min_latency_ms': 207.9324722290039, 'max_latency_ms': 2018.561601638794, 'p95_latency_ms': np.float64(1963.4979844093323), 'p99_latency_ms': np.float64(2007.5488781929016), 'total_time_s': 5.401307582855225, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999996, 'quality_std': 0.04750434388073592, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1,scaling_test,claude-3-haiku,1268.5401320457458,3.539921687652236,0.0,0.0,1.0,0,20,5,1759345797.6495905,0.0256,10240,0.8406194607723803,"{'min_latency_ms': 534.9514484405518, 'max_latency_ms': 1956.9103717803955, 'p95_latency_ms': np.float64(1938.3319020271301), 'p99_latency_ms': np.float64(1953.1946778297424), 'total_time_s': 5.6498425006866455, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.053962632063170944, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,claude-3-haiku,1377.644693851471,3.189212271479164,0.0,0.0,1.0,0,20,5,1759345808.2179801,0.0256,10240,0.8370154862115219,"{'min_latency_ms': 661.4456176757812, 'max_latency_ms': 2013.9634609222412, 'p95_latency_ms': np.float64(1985.2455973625183), 'p99_latency_ms': np.float64(2008.2198882102966), 'total_time_s': 6.271141052246094, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.057589803133820325, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,claude-3-haiku,1161.9974493980408,3.6778795132801156,0.0,0.0,1.0,0,20,5,1759345817.2541294,0.0256,10240,0.8421329247896683,"{'min_latency_ms': 549.6580600738525, 'max_latency_ms': 1785.23588180542, 'p95_latency_ms': np.float64(1730.9520959854126), 'p99_latency_ms': np.float64(1774.3791246414185), 'total_time_s': 5.437916040420532, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.05774508247670216, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,claude-3-haiku,1365.4750227928162,2.998821435629251,0.0,0.0,1.0,0,20,5,1759345827.8750126,0.0256,10240,0.8483772503724578,"{'min_latency_ms': 767.146110534668, 'max_latency_ms': 1936.8767738342285, 'p95_latency_ms': np.float64(1919.3583130836487), 'p99_latency_ms': np.float64(1933.3730816841125), 'total_time_s': 6.669286727905273, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.05705131022796498, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1,scaling_test,claude-3-sonnet,1360.187566280365,3.089520735450049,0.0,0.0,1.0,0,20,5,1759345837.7737727,0.15360000000000001,10240,0.8835217044830507,"{'min_latency_ms': 550.3547191619873, 'max_latency_ms': 1977.1480560302734, 'p95_latency_ms': np.float64(1924.659264087677), 'p99_latency_ms': np.float64(1966.6502976417542), 'total_time_s': 6.473495960235596, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.058452629496046606, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,claude-3-sonnet,1256.138801574707,3.4732685564079335,0.0,0.0,1.0,0,20,5,1759345848.5701082,0.15360000000000001,10240,0.8863139635356961,"{'min_latency_ms': 641.2796974182129, 'max_latency_ms': 1980.7326793670654, 'p95_latency_ms': np.float64(1846.4025855064392), 'p99_latency_ms': np.float64(1953.86666059494), 'total_time_s': 5.758264780044556, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.05783521510861833, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,claude-3-sonnet,1306.07008934021,3.5020347317551495,0.0,0.0,1.0,0,20,5,1759345858.6472163,0.15360000000000001,10240,0.9094961422561505,"{'min_latency_ms': 591.8083190917969, 'max_latency_ms': 1971.1270332336426, 'p95_latency_ms': np.float64(1944.3620324134827), 'p99_latency_ms': np.float64(1965.7740330696106), 'total_time_s': 5.710965633392334, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.042442911768923584, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,claude-3-sonnet,1307.1481943130493,3.262938882676132,0.0,0.0,1.0,0,20,5,1759345869.905544,0.15360000000000001,10240,0.8938240662052681,"{'min_latency_ms': 646.7251777648926, 'max_latency_ms': 1990.9627437591553, 'p95_latency_ms': np.float64(1935.0676536560059), 'p99_latency_ms': np.float64(1979.7837257385254), 'total_time_s': 6.129443645477295, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.04247877605865338, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1,scaling_test,gemini-1.5-pro,1401.3476371765137,2.943218490521141,0.0,0.0,1.0,0,20,5,1759345881.238218,0.0128,10240,0.9409363720199192,"{'min_latency_ms': 520.9827423095703, 'max_latency_ms': 1970.2589511871338, 'p95_latency_ms': np.float64(1958.1118822097778), 'p99_latency_ms': np.float64(1967.8295373916626), 'total_time_s': 6.7952821254730225, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.05267230653872383, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,gemini-1.5-pro,1341.485834121704,3.3982951582179024,0.0,0.0,1.0,0,20,5,1759345889.5553467,0.0128,10240,0.9355344625586725,"{'min_latency_ms': 503.9515495300293, 'max_latency_ms': 1978.0657291412354, 'p95_latency_ms': np.float64(1966.320013999939), 'p99_latency_ms': np.float64(1975.716586112976), 'total_time_s': 5.885303974151611, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.054780000845711954, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,gemini-1.5-pro,1344.3536400794983,3.445457146125384,0.0,0.0,1.0,0,20,5,1759345898.4512925,0.0128,10240,0.9276983017835836,"{'min_latency_ms': 615.3252124786377, 'max_latency_ms': 1981.612205505371, 'p95_latency_ms': np.float64(1803.935217857361), 'p99_latency_ms': np.float64(1946.0768079757688), 'total_time_s': 5.8047449588775635, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.05905363250623063, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,gemini-1.5-pro,1202.2199511528015,3.696869831400932,0.0,0.0,1.0,0,20,5,1759345907.5707264,0.0128,10240,0.9307740387961949,"{'min_latency_ms': 589.9953842163086, 'max_latency_ms': 1967.3075675964355, 'p95_latency_ms': np.float64(1913.6008977890015), 'p99_latency_ms': np.float64(1956.5662336349487), 'total_time_s': 5.409982204437256, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.04978369465928124, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1,scaling_test,gemini-1.5-flash,1053.9512276649475,3.823265280376166,0.0,0.0,1.0,0,20,5,1759345915.0947819,0.007679999999999998,10240,0.8813998853517441,"{'min_latency_ms': -36.76271438598633, 'max_latency_ms': 1967.0710563659668, 'p95_latency_ms': np.float64(1855.4362535476685), 'p99_latency_ms': np.float64(1944.744095802307), 'total_time_s': 5.231130599975586, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0003839999999999999, 'quality_std': 0.050008698196664016, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,gemini-1.5-flash,1155.3911447525024,3.615636866719992,0.0,0.0,1.0,0,20,5,1759345925.0694563,0.007679999999999998,10240,0.9025102091839412,"{'min_latency_ms': 502.6116371154785, 'max_latency_ms': 1947.0453262329102, 'p95_latency_ms': np.float64(1765.414369106293), 'p99_latency_ms': np.float64(1910.7191348075864), 'total_time_s': 5.531528949737549, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0003839999999999999, 'quality_std': 0.059194105459554974, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,gemini-1.5-flash,1217.6612257957458,3.756965086673101,0.0,0.0,1.0,0,20,5,1759345934.1183383,0.007679999999999998,10240,0.8709830012564668,"{'min_latency_ms': 560.8868598937988, 'max_latency_ms': 2007.932424545288, 'p95_latency_ms': np.float64(1776.0017752647402), 'p99_latency_ms': np.float64(1961.5462946891782), 'total_time_s': 5.323445796966553, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0003839999999999999, 'quality_std': 0.052873446152615404, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,gemini-1.5-flash,1351.5228390693665,3.367995990496259,0.0,0.0,1.0,0,20,5,1759345942.2099788,0.007679999999999998,10240,0.872315613940513,"{'min_latency_ms': 689.1014575958252, 'max_latency_ms': 1980.147361755371, 'p95_latency_ms': np.float64(1956.2964797019958), 'p99_latency_ms': np.float64(1975.377185344696), 'total_time_s': 5.938249349594116, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0003839999999999999, 'quality_std': 0.05361394744479093, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1,scaling_test,llama-3.1-8b,1306.591236591339,3.3070039261320594,0.0,0.0,1.0,0,20,5,1759345952.8692935,0.002048000000000001,10240,0.7778348786353027,"{'min_latency_ms': 555.4070472717285, 'max_latency_ms': 1988.0244731903076, 'p95_latency_ms': np.float64(1957.3988199234009), 'p99_latency_ms': np.float64(1981.8993425369263), 'total_time_s': 6.047770261764526, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000006, 'quality_std': 0.05832225784189981, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,llama-3.1-8b,1199.6222853660583,3.634358086220239,0.0,0.0,1.0,0,20,5,1759345963.5152647,0.002048000000000001,10240,0.7696592403957419,"{'min_latency_ms': 541.0621166229248, 'max_latency_ms': 1914.41011428833, 'p95_latency_ms': np.float64(1768.0468797683716), 'p99_latency_ms': np.float64(1885.1374673843382), 'total_time_s': 5.503035068511963, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000006, 'quality_std': 0.06176209698043544, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,llama-3.1-8b,1143.358552455902,4.173916297150752,0.0,0.0,1.0,0,20,5,1759345973.8406181,0.002048000000000001,10240,0.7857043630038748,"{'min_latency_ms': 631.817102432251, 'max_latency_ms': 1720.1111316680908, 'p95_latency_ms': np.float64(1547.544610500336), 'p99_latency_ms': np.float64(1685.5978274345396), 'total_time_s': 4.791662931442261, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000006, 'quality_std': 0.06142254552174686, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,llama-3.1-8b,1228.6048531532288,3.613465135130269,0.0,0.0,1.0,0,20,5,1759345982.2759545,0.002048000000000001,10240,0.7706622409066766,"{'min_latency_ms': 539.0913486480713, 'max_latency_ms': 1971.7633724212646, 'p95_latency_ms': np.float64(1819.2362308502197), 'p99_latency_ms': np.float64(1941.2579441070554), 'total_time_s': 5.534853458404541, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000006, 'quality_std': 0.05320944570994387, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1,scaling_test,llama-3.1-70b,1424.0724563598633,2.989394263900763,0.0,0.0,1.0,0,20,5,1759345993.4949126,0.008192000000000005,10240,0.8731561293258354,"{'min_latency_ms': 700.6974220275879, 'max_latency_ms': 1959.3937397003174, 'p95_latency_ms': np.float64(1924.493396282196), 'p99_latency_ms': np.float64(1952.4136710166931), 'total_time_s': 6.690318584442139, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00040960000000000025, 'quality_std': 0.0352234743129485, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
6,scaling_test,llama-3.1-70b,1090.003514289856,4.145917207566353,0.0,0.0,1.0,0,20,5,1759346002.3353932,0.008192000000000005,10240,0.8796527768140011,"{'min_latency_ms': 508.23211669921875, 'max_latency_ms': 1798.6392974853516, 'p95_latency_ms': np.float64(1785.5579257011414), 'p99_latency_ms': np.float64(1796.0230231285095), 'total_time_s': 4.824023008346558, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00040960000000000025, 'quality_std': 0.06407982743031454, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
11,scaling_test,llama-3.1-70b,964.3666982650757,4.70392645090585,0.0,0.0,1.0,0,20,5,1759346010.6974216,0.008192000000000005,10240,0.8992009479579495,"{'min_latency_ms': 135.56504249572754, 'max_latency_ms': 1794.3906784057617, 'p95_latency_ms': np.float64(1775.5030393600464), 'p99_latency_ms': np.float64(1790.6131505966187), 'total_time_s': 4.251767158508301, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00040960000000000025, 'quality_std': 0.050182727925105516, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
16,scaling_test,llama-3.1-70b,1258.9476823806763,3.653831604110515,0.125,0.0,1.0,0,20,5,1759346020.388094,0.008192000000000005,10240,0.8930892849911802,"{'min_latency_ms': 620.0413703918457, 'max_latency_ms': 1916.384220123291, 'p95_latency_ms': np.float64(1765.2448296546936), 'p99_latency_ms': np.float64(1886.1563420295713), 'total_time_s': 5.473706007003784, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.5546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00040960000000000025, 'quality_std': 0.04969618373257882, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gpt-4o-mini,1273.702096939087,0.7851086796926611,0.0,0.0,1.0,0,10,1,1759346033.2373884,0.0007680000000000001,5120,0.8342026655690804,"{'min_latency_ms': 741.3482666015625, 'max_latency_ms': 1817.1906471252441, 'p95_latency_ms': np.float64(1794.5520520210266), 'p99_latency_ms': np.float64(1812.6629281044006), 'total_time_s': 12.737090110778809, 'initial_memory_mb': 294.5546875, 'final_memory_mb': 294.5546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000001e-05, 'quality_std': 0.0446055902590032, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gpt-4o-mini,1511.399483680725,2.933763102440156,0.25,0.0,1.0,0,10,6,1759346036.647214,0.0007680000000000001,5120,0.8471277213854321,"{'min_latency_ms': 800.0023365020752, 'max_latency_ms': 1982.2335243225098, 'p95_latency_ms': np.float64(1942.5656914710999), 'p99_latency_ms': np.float64(1974.2999577522278), 'total_time_s': 3.4085915088653564, 'initial_memory_mb': 294.5546875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000001e-05, 'quality_std': 0.06432848764341552, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gpt-4o,1150.0491619110107,0.8695228900132853,0.0,0.0,1.0,0,10,1,1759346048.2587333,0.0256,5120,0.9599583095352598,"{'min_latency_ms': 544.191837310791, 'max_latency_ms': 1584.9177837371826, 'p95_latency_ms': np.float64(1511.2051010131834), 'p99_latency_ms': np.float64(1570.1752471923828), 'total_time_s': 11.50055980682373, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.057087428808928614, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gpt-4o,1241.9081926345825,3.22981029743519,0.0,0.0,1.0,0,10,6,1759346051.3563757,0.0256,5120,0.9585199558650109,"{'min_latency_ms': 644.8915004730225, 'max_latency_ms': 1933.1202507019043, 'p95_latency_ms': np.float64(1865.2720570564268), 'p99_latency_ms': np.float64(1919.5506119728088), 'total_time_s': 3.0961570739746094, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.04062204558012218, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gpt-4-turbo,1581.8750381469727,0.6321581179029606,0.0,0.0,1.0,0,10,1,1759346067.3017964,0.0512,5120,0.9324427514695872,"{'min_latency_ms': 833.935022354126, 'max_latency_ms': 2019.5622444152832, 'p95_latency_ms': np.float64(1978.4671545028687), 'p99_latency_ms': np.float64(2011.3432264328003), 'total_time_s': 15.818827152252197, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.04654046504268862, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gpt-4-turbo,1153.432297706604,3.2168993240245847,0.0,0.0,1.0,0,10,6,1759346070.4116762,0.0512,5120,0.9790878168553954,"{'min_latency_ms': 635.2591514587402, 'max_latency_ms': 1833.7628841400146, 'p95_latency_ms': np.float64(1808.298635482788), 'p99_latency_ms': np.float64(1828.6700344085693), 'total_time_s': 3.108583450317383, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.038783270511690816, 'data_size_processed': 1000, 'model_provider': 'gpt'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,claude-3-5-sonnet,1397.6783752441406,0.7154680102707422,0.0,0.0,1.0,0,10,1,1759346084.5017824,0.015359999999999999,5120,0.9421283071854264,"{'min_latency_ms': 532.8092575073242, 'max_latency_ms': 2028.5301208496094, 'p95_latency_ms': np.float64(1968.815779685974), 'p99_latency_ms': np.float64(2016.5872526168823), 'total_time_s': 13.976865291595459, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999998, 'quality_std': 0.041911119259679885, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,claude-3-5-sonnet,1215.26198387146,3.6278421983995233,0.0,0.0,1.0,0,10,6,1759346087.2596216,0.015359999999999999,5120,0.9131170426955485,"{'min_latency_ms': 568.2053565979004, 'max_latency_ms': 1612.9648685455322, 'p95_latency_ms': np.float64(1559.6276402473447), 'p99_latency_ms': np.float64(1602.2974228858948), 'total_time_s': 2.7564594745635986, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999998, 'quality_std': 0.04319876804321411, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,claude-3-haiku,1299.2276906967163,0.7696826190331395,0.0,0.0,1.0,0,10,1,1759346100.364407,0.0128,5120,0.8252745814485088,"{'min_latency_ms': 668.3671474456787, 'max_latency_ms': 2041.351318359375, 'p95_latency_ms': np.float64(1843.0875778198238), 'p99_latency_ms': np.float64(2001.6985702514648), 'total_time_s': 12.992368221282959, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.058205855327116265, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,claude-3-haiku,1297.508192062378,3.6581654644321087,0.0,0.0,1.0,0,10,6,1759346103.0993996,0.0128,5120,0.8496515913760503,"{'min_latency_ms': 649.4293212890625, 'max_latency_ms': 1873.1675148010254, 'p95_latency_ms': np.float64(1843.8988208770752), 'p99_latency_ms': np.float64(1867.3137760162354), 'total_time_s': 2.7336106300354004, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.06872259975771335, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,claude-3-sonnet,1239.8123741149902,0.8065692205263874,0.0,0.0,1.0,0,10,1,1759346114.9650035,0.07680000000000001,5120,0.8917269647002374,"{'min_latency_ms': 559.9334239959717, 'max_latency_ms': 1828.9196491241455, 'p95_latency_ms': np.float64(1804.089903831482), 'p99_latency_ms': np.float64(1823.9537000656128), 'total_time_s': 12.398191928863525, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.06728256480558785, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,claude-3-sonnet,1325.3875255584717,3.2305613290400945,0.0,0.0,1.0,0,10,6,1759346118.062173,0.07680000000000001,5120,0.8904253939966993,"{'min_latency_ms': 598.4294414520264, 'max_latency_ms': 1956.3815593719482, 'p95_latency_ms': np.float64(1906.8223834037778), 'p99_latency_ms': np.float64(1946.4697241783142), 'total_time_s': 3.0954372882843018, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.06220445402424322, 'data_size_processed': 1000, 'model_provider': 'claude'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gemini-1.5-pro,1264.2754554748535,0.7909630217832475,0.0,0.0,1.0,0,10,1,1759346130.8282964,0.0064,5120,0.8998460053229075,"{'min_latency_ms': 532.9890251159668, 'max_latency_ms': 1795.492172241211, 'p95_latency_ms': np.float64(1745.6329107284544), 'p99_latency_ms': np.float64(1785.5203199386597), 'total_time_s': 12.642816066741943, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.04050886994282564, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gemini-1.5-pro,1342.9006338119507,3.7829150181123015,0.0,0.0,1.0,0,10,6,1759346133.472956,0.0064,5120,0.9029938738274873,"{'min_latency_ms': 701.9498348236084, 'max_latency_ms': 1964.576005935669, 'p95_latency_ms': np.float64(1872.5560665130613), 'p99_latency_ms': np.float64(1946.1720180511475), 'total_time_s': 2.6434640884399414, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.05723923041822323, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gemini-1.5-flash,1368.2588577270508,0.7308515907093506,0.0,0.0,1.0,0,10,1,1759346147.2717574,0.0038399999999999997,5120,0.8795901650694117,"{'min_latency_ms': 620.3913688659668, 'max_latency_ms': 2018.2685852050781, 'p95_latency_ms': np.float64(1993.7742233276367), 'p99_latency_ms': np.float64(2013.3697128295898), 'total_time_s': 13.682668447494507, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00038399999999999996, 'quality_std': 0.05927449072307118, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,gemini-1.5-flash,1207.8629732131958,3.2879592824302044,0.0,0.0,1.0,0,10,6,1759346150.314617,0.0038399999999999997,5120,0.8611774574826484,"{'min_latency_ms': 594.973087310791, 'max_latency_ms': 1811.2657070159912, 'p95_latency_ms': np.float64(1681.6352963447569), 'p99_latency_ms': np.float64(1785.3396248817444), 'total_time_s': 3.041400194168091, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00038399999999999996, 'quality_std': 0.07904328865026665, 'data_size_processed': 1000, 'model_provider': 'gemini'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,llama-3.1-8b,1144.2910194396973,0.8738903631276332,0.0,0.0,1.0,0,10,1,1759346161.882389,0.0010240000000000002,5120,0.7805684315735588,"{'min_latency_ms': 594.846248626709, 'max_latency_ms': 1759.0994834899902, 'p95_latency_ms': np.float64(1631.7564606666563), 'p99_latency_ms': np.float64(1733.6308789253235), 'total_time_s': 11.443083047866821, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000002, 'quality_std': 0.0613021253594286, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,llama-3.1-8b,1128.666615486145,3.527006383973853,0.0,0.0,1.0,0,10,6,1759346164.7190907,0.0010240000000000002,5120,0.7915276538063776,"{'min_latency_ms': 610.3026866912842, 'max_latency_ms': 1934.2899322509766, 'p95_latency_ms': np.float64(1909.2738270759583), 'p99_latency_ms': np.float64(1929.286711215973), 'total_time_s': 2.835265636444092, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000002, 'quality_std': 0.055242108041169316, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,llama-3.1-70b,1341.410732269287,0.7454805363345477,0.0,0.0,1.0,0,10,1,1759346178.2571824,0.004096000000000001,5120,0.8513858389112968,"{'min_latency_ms': 566.3845539093018, 'max_latency_ms': 1769.1750526428223, 'p95_latency_ms': np.float64(1743.9924359321594), 'p99_latency_ms': np.float64(1764.1385293006897), 'total_time_s': 13.414166450500488, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004096000000000001, 'quality_std': 0.06286695897481548, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,concurrent_test,llama-3.1-70b,1410.3811264038086,3.52022788340447,0.0,0.0,1.0,0,10,6,1759346181.0992308,0.004096000000000001,5120,0.8534058400920448,"{'min_latency_ms': 572.9773044586182, 'max_latency_ms': 1928.0850887298584, 'p95_latency_ms': np.float64(1903.529143333435), 'p99_latency_ms': np.float64(1923.1738996505737), 'total_time_s': 2.8407251834869385, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004096000000000001, 'quality_std': 0.059750620144052545, 'data_size_processed': 1000, 'model_provider': 'llama'}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gpt-4o-mini,1177.2440481185913,3.97501008701798,0.0,0.0,1.0,0,50,5,1759346193.7901201,0.0038400000000000023,25600,0.8512259391579574,"{'min_latency_ms': 537.5485420227051, 'max_latency_ms': 2001.0862350463867, 'p95_latency_ms': np.float64(1892.5400853157041), 'p99_latency_ms': np.float64(1985.4257130622864), 'total_time_s': 12.578584432601929, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000005e-05, 'quality_std': 0.0581968026848211, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gpt-4o-mini,1229.8026752471924,3.9282369679460363,0.0,0.0,1.0,0,50,5,1759346206.6300905,0.0038400000000000023,25600,0.8537868196468017,"{'min_latency_ms': 518.6026096343994, 'max_latency_ms': 1944.331407546997, 'p95_latency_ms': np.float64(1909.6850633621214), 'p99_latency_ms': np.float64(1940.652117729187), 'total_time_s': 12.72835636138916, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000005e-05, 'quality_std': 0.05181407518487485, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gpt-4o-mini,1274.8144483566284,3.7483119966709824,0.0,0.0,1.0,0,50,5,1759346220.0900073,0.0038400000000000023,25600,0.8487480924622282,"{'min_latency_ms': 529.292106628418, 'max_latency_ms': 1996.4158535003662, 'p95_latency_ms': np.float64(1960.6919050216675), 'p99_latency_ms': np.float64(1988.2149648666382), 'total_time_s': 13.339337825775146, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000005e-05, 'quality_std': 0.05812899461310237, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gpt-4o,1174.5057010650635,4.0514136389986115,0.0,0.0,1.0,0,50,5,1759346232.557784,0.12800000000000017,25600,0.9484191580718665,"{'min_latency_ms': 286.58127784729004, 'max_latency_ms': 1877.345085144043, 'p95_latency_ms': np.float64(1735.1435780525208), 'p99_latency_ms': np.float64(1842.000467777252), 'total_time_s': 12.341371297836304, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0025600000000000032, 'quality_std': 0.0491398572941036, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gpt-4o,1225.388593673706,3.875932429633176,0.125,0.0,1.0,0,50,5,1759346245.5669534,0.12800000000000017,25600,0.9557179217710832,"{'min_latency_ms': 514.6803855895996, 'max_latency_ms': 2034.6620082855225, 'p95_latency_ms': np.float64(1909.4360709190366), 'p99_latency_ms': np.float64(2010.34743309021), 'total_time_s': 12.900121688842773, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0025600000000000032, 'quality_std': 0.04870463047338363, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gpt-4o,1244.0021991729736,3.7266446101546777,0.0,0.0,1.0,0,50,5,1759346259.1414776,0.12800000000000017,25600,0.9458944372937584,"{'min_latency_ms': 521.9912528991699, 'max_latency_ms': 1986.6855144500732, 'p95_latency_ms': np.float64(1953.3554077148438), 'p99_latency_ms': np.float64(1978.9683985710144), 'total_time_s': 13.416895151138306, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0025600000000000032, 'quality_std': 0.04851286804634898, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gpt-4-turbo,1181.3615322113037,4.124998416603219,0.0,0.0,1.0,0,50,5,1759346271.374578,0.25600000000000034,25600,0.9651345363111258,"{'min_latency_ms': 353.2071113586426, 'max_latency_ms': 1966.524362564087, 'p95_latency_ms': np.float64(1945.0057744979858), 'p99_latency_ms': np.float64(1965.7717752456665), 'total_time_s': 12.121216773986816, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0051200000000000065, 'quality_std': 0.04338778763022959, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gpt-4-turbo,1291.4055681228638,3.77552400952112,0.0,0.0,1.0,0,50,5,1759346284.731812,0.25600000000000034,25600,0.9689389907566063,"{'min_latency_ms': 555.095911026001, 'max_latency_ms': 2027.0910263061523, 'p95_latency_ms': np.float64(1966.5393114089964), 'p99_latency_ms': np.float64(2018.9284563064575), 'total_time_s': 13.243194818496704, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0051200000000000065, 'quality_std': 0.04154143035607859, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gpt-4-turbo,1261.4208269119263,3.663208321130074,0.0,0.0,1.0,0,50,5,1759346298.4905493,0.25600000000000034,25600,0.9573488473081913,"{'min_latency_ms': 284.8320007324219, 'max_latency_ms': 2011.866807937622, 'p95_latency_ms': np.float64(1975.5298137664795), 'p99_latency_ms': np.float64(2000.7115292549133), 'total_time_s': 13.649237394332886, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0051200000000000065, 'quality_std': 0.04380501534660363, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,claude-3-5-sonnet,1270.3543138504028,3.7944320989090614,0.0,0.0,1.0,0,50,5,1759346311.7936022,0.07680000000000001,25600,0.948463600922609,"{'min_latency_ms': 622.9770183563232, 'max_latency_ms': 1970.0510501861572, 'p95_latency_ms': np.float64(1868.455410003662), 'p99_latency_ms': np.float64(1957.5506472587585), 'total_time_s': 13.177202463150024, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.001536, 'quality_std': 0.04872900892927657, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,claude-3-5-sonnet,1154.527621269226,4.107802148818313,0.0,0.0,1.0,0,50,5,1759346324.0782034,0.07680000000000001,25600,0.9535056752128789,"{'min_latency_ms': 526.8404483795166, 'max_latency_ms': 1841.3877487182617, 'p95_latency_ms': np.float64(1815.3946280479431), 'p99_latency_ms': np.float64(1837.1384692192078), 'total_time_s': 12.171959161758423, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.001536, 'quality_std': 0.04600056992617095, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,claude-3-5-sonnet,1341.6658163070679,3.5050325493977805,0.0,0.0,1.0,0,50,5,1759346338.4560573,0.07680000000000001,25600,0.947231761746643,"{'min_latency_ms': 607.1841716766357, 'max_latency_ms': 1968.3496952056885, 'p95_latency_ms': np.float64(1938.420307636261), 'p99_latency_ms': np.float64(1963.8122081756592), 'total_time_s': 14.265202760696411, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.001536, 'quality_std': 0.0468041040494112, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,claude-3-haiku,1268.9041805267334,3.6527405734902607,0.125,0.0,1.0,0,50,5,1759346352.2760284,0.06400000000000008,25600,0.8657832919908838,"{'min_latency_ms': 576.9007205963135, 'max_latency_ms': 1978.3263206481934, 'p95_latency_ms': np.float64(1900.9657382965088), 'p99_latency_ms': np.float64(1977.4397349357605), 'total_time_s': 13.688352346420288, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0012800000000000016, 'quality_std': 0.05791027367020173, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,claude-3-haiku,1273.6989831924438,3.7602543777430877,0.0,0.0,1.0,0,50,5,1759346365.681829,0.06400000000000008,25600,0.8396294693060197,"{'min_latency_ms': 521.7316150665283, 'max_latency_ms': 1988.7199401855469, 'p95_latency_ms': np.float64(1945.9344744682312), 'p99_latency_ms': np.float64(1987.1683859825134), 'total_time_s': 13.296972751617432, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0012800000000000016, 'quality_std': 0.06291349263235946, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,claude-3-haiku,1234.9269914627075,3.9335082345318124,0.0,0.0,1.0,0,50,5,1759346378.5192664,0.06400000000000008,25600,0.8469784358915146,"{'min_latency_ms': 529.503345489502, 'max_latency_ms': 1981.7008972167969, 'p95_latency_ms': np.float64(1859.1547846794128), 'p99_latency_ms': np.float64(1963.3227896690369), 'total_time_s': 12.711299180984497, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0012800000000000016, 'quality_std': 0.061722943046806616, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,claude-3-sonnet,1195.9008169174194,4.06962738382444,0.0,0.0,1.0,0,50,5,1759346390.9144897,0.3840000000000003,25600,0.9026531444228556,"{'min_latency_ms': -36.6673469543457, 'max_latency_ms': 1991.610050201416, 'p95_latency_ms': np.float64(1819.4202184677124), 'p99_latency_ms': np.float64(1987.222683429718), 'total_time_s': 12.286137104034424, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000005, 'quality_std': 0.058229589360407986, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,claude-3-sonnet,1372.0379829406738,3.502253345465805,0.0,0.0,1.0,0,50,5,1759346405.3043494,0.3840000000000003,25600,0.8837364473272626,"{'min_latency_ms': 543.1270599365234, 'max_latency_ms': 1992.779016494751, 'p95_latency_ms': np.float64(1931.822681427002), 'p99_latency_ms': np.float64(1987.4089169502258), 'total_time_s': 14.276522874832153, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000005, 'quality_std': 0.05634614113838598, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,claude-3-sonnet,1257.2709035873413,3.7764857062182706,0.0,0.0,1.0,0,50,5,1759346418.6521854,0.3840000000000003,25600,0.9053414058751514,"{'min_latency_ms': 529.8404693603516, 'max_latency_ms': 1990.1280403137207, 'p95_latency_ms': np.float64(1911.1806631088257), 'p99_latency_ms': np.float64(1976.6331052780151), 'total_time_s': 13.239822387695312, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000005, 'quality_std': 0.050506656009957705, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gemini-1.5-pro,1221.5951490402222,3.8372908969845323,0.0,0.0,1.0,0,50,5,1759346431.7921565,0.03200000000000004,25600,0.9365925291921394,"{'min_latency_ms': 329.1811943054199, 'max_latency_ms': 1995.384693145752, 'p95_latency_ms': np.float64(1965.0332808494568), 'p99_latency_ms': np.float64(1988.3063769340515), 'total_time_s': 13.030025959014893, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0006400000000000008, 'quality_std': 0.04847128641002876, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gemini-1.5-pro,1351.8355464935303,3.6227975436552606,0.0,0.0,1.0,0,50,5,1759346445.7126448,0.03200000000000004,25600,0.9323552590826123,"{'min_latency_ms': 515.129566192627, 'max_latency_ms': 2008.0702304840088, 'p95_latency_ms': np.float64(1958.6564779281616), 'p99_latency_ms': np.float64(2004.1296029090881), 'total_time_s': 13.801488876342773, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0006400000000000008, 'quality_std': 0.055840796126395656, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gemini-1.5-pro,1240.622534751892,3.8813384098374453,0.0,0.0,1.0,0,50,5,1759346458.7192729,0.03200000000000004,25600,0.9407390543744837,"{'min_latency_ms': -29.146671295166016, 'max_latency_ms': 1934.4398975372314, 'p95_latency_ms': np.float64(1849.7230291366577), 'p99_latency_ms': np.float64(1918.0084466934204), 'total_time_s': 12.8821542263031, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0006400000000000008, 'quality_std': 0.050597003908357786, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gemini-1.5-flash,1237.6702642440796,3.812923495644346,0.0,0.0,1.0,0,50,5,1759346471.9588974,0.019200000000000002,25600,0.8556073429019542,"{'min_latency_ms': 536.4787578582764, 'max_latency_ms': 2010.1728439331055, 'p95_latency_ms': np.float64(1911.8669629096985), 'p99_latency_ms': np.float64(1976.080708503723), 'total_time_s': 13.113297462463379, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.000384, 'quality_std': 0.06082135675952047, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gemini-1.5-flash,1180.0980806350708,4.016049090832003,0.0,0.0,1.0,0,50,5,1759346484.5327744,0.019200000000000002,25600,0.8718428063415768,"{'min_latency_ms': 109.58051681518555, 'max_latency_ms': 1993.358850479126, 'p95_latency_ms': np.float64(1872.3165988922117), 'p99_latency_ms': np.float64(1992.416422367096), 'total_time_s': 12.450047016143799, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.000384, 'quality_std': 0.0613916834940056, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,gemini-1.5-flash,1194.4490098953247,4.009936119483076,0.0,0.0,1.0,0,50,5,1759346497.1201088,0.019200000000000002,25600,0.8652112059805899,"{'min_latency_ms': 520.3211307525635, 'max_latency_ms': 1942.4259662628174, 'p95_latency_ms': np.float64(1834.6370577812195), 'p99_latency_ms': np.float64(1890.3984904289243), 'total_time_s': 12.469026565551758, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.000384, 'quality_std': 0.05312368368226588, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,llama-3.1-8b,1306.2016773223877,3.683763547696555,0.0,0.0,1.0,0,50,5,1759346510.812732,0.005119999999999998,25600,0.7727309350554936,"{'min_latency_ms': 527.4953842163086, 'max_latency_ms': 1997.086524963379, 'p95_latency_ms': np.float64(1942.7793741226194), 'p99_latency_ms': np.float64(1994.0643763542175), 'total_time_s': 13.573075294494629, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010239999999999995, 'quality_std': 0.05596283861854901, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,llama-3.1-8b,1304.1251468658447,3.617383744773005,0.0,0.0,1.0,0,50,5,1759346524.7711937,0.005119999999999998,25600,0.785787220179362,"{'min_latency_ms': 112.00571060180664, 'max_latency_ms': 2015.146255493164, 'p95_latency_ms': np.float64(2001.4938592910767), 'p99_latency_ms': np.float64(2012.321424484253), 'total_time_s': 13.822144269943237, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010239999999999995, 'quality_std': 0.0552285639827787, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,llama-3.1-8b,1290.5346298217773,3.671522710311051,0.0,0.0,1.0,0,50,5,1759346538.5084107,0.005119999999999998,25600,0.7771978709125356,"{'min_latency_ms': 565.7510757446289, 'max_latency_ms': 1945.1093673706055, 'p95_latency_ms': np.float64(1906.785237789154), 'p99_latency_ms': np.float64(1942.4526476860046), 'total_time_s': 13.618327856063843, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010239999999999995, 'quality_std': 0.057252814774054535, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,llama-3.1-70b,1213.9334726333618,3.947675276737486,0.0,0.0,1.0,0,50,5,1759346551.2951744,0.02047999999999999,25600,0.8683286341213061,"{'min_latency_ms': -79.86569404602051, 'max_latency_ms': 2014.9149894714355, 'p95_latency_ms': np.float64(1919.9433565139768), 'p99_latency_ms': np.float64(1992.4925136566162), 'total_time_s': 12.665682077407837, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004095999999999998, 'quality_std': 0.05862810413022958, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 0}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,llama-3.1-70b,1298.1958770751953,3.7049711897976763,0.0,0.0,1.0,0,50,5,1759346564.9280033,0.02047999999999999,25600,0.8889975698232048,"{'min_latency_ms': 503.5574436187744, 'max_latency_ms': 2020.4124450683594, 'p95_latency_ms': np.float64(1901.4497756958008), 'p99_latency_ms': np.float64(1986.3133001327512), 'total_time_s': 13.495381593704224, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004095999999999998, 'quality_std': 0.053463278827038344, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 1}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
5,memory_test,llama-3.1-70b,1187.040138244629,4.165139112812611,0.0,0.0,1.0,0,50,5,1759346577.0467978,0.02047999999999999,25600,0.8884529182459214,"{'min_latency_ms': 506.2377452850342, 'max_latency_ms': 2026.6106128692627, 'p95_latency_ms': np.float64(1958.3556652069092), 'p99_latency_ms': np.float64(2007.5032830238342), 'total_time_s': 12.004400968551636, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004095999999999998, 'quality_std': 0.05625669416735748, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 2}",0.0,0.0,0.0,0.0,0,False,0,0.0,0,0.0,False
1 agent_count test_name model_name latency_ms throughput_rps memory_usage_mb cpu_usage_percent success_rate error_count total_requests concurrent_requests timestamp cost_usd tokens_used response_quality_score additional_metrics agent_creation_time tool_registration_time execution_time total_latency chaining_steps chaining_success error_scenarios_tested recovery_rate resource_cycles avg_memory_delta memory_leak_detected
2 1 scaling_test gpt-4o-mini 1131.7063331604004 4.131429224630576 1.25 0.0 1.0 0 20 5 1759345643.9453266 0.0015359999999999996 10240 0.8548663728748707 {'min_latency_ms': 562.7951622009277, 'max_latency_ms': 1780.4391384124756, 'p95_latency_ms': np.float64(1744.0685987472534), 'p99_latency_ms': np.float64(1773.1650304794312), 'total_time_s': 4.84093976020813, 'initial_memory_mb': 291.5546875, 'final_memory_mb': 292.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.679999999999998e-05, 'quality_std': 0.0675424923987846, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
3 6 scaling_test gpt-4o-mini 1175.6950378417969 3.7575854004826277 0.0 0.0 1.0 0 20 5 1759345654.225195 0.0015359999999999996 10240 0.8563524483655013 {'min_latency_ms': 535.4223251342773, 'max_latency_ms': 1985.3930473327637, 'p95_latency_ms': np.float64(1975.6355285644531), 'p99_latency_ms': np.float64(1983.4415435791016), 'total_time_s': 5.322566986083984, 'initial_memory_mb': 293.1796875, 'final_memory_mb': 293.1796875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.679999999999998e-05, 'quality_std': 0.05770982402152013, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
4 11 scaling_test gpt-4o-mini 996.9684720039368 4.496099509029146 0.0 0.0 1.0 0 20 5 1759345662.8977199 0.0015359999999999996 10240 0.8844883644941982 {'min_latency_ms': 45.22204399108887, 'max_latency_ms': 1962.2983932495117, 'p95_latency_ms': np.float64(1647.7753758430483), 'p99_latency_ms': np.float64(1899.3937897682185), 'total_time_s': 4.448300123214722, 'initial_memory_mb': 293.5546875, 'final_memory_mb': 293.5546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.679999999999998e-05, 'quality_std': 0.043434832388308614, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
5 16 scaling_test gpt-4o-mini 1112.8681421279907 3.587833950074127 0.0 0.0 1.0 0 20 5 1759345673.162652 0.0015359999999999996 10240 0.8563855623109009 {'min_latency_ms': 564.1369819641113, 'max_latency_ms': 1951.472282409668, 'p95_latency_ms': np.float64(1897.4883794784546), 'p99_latency_ms': np.float64(1940.6755018234253), 'total_time_s': 5.57439398765564, 'initial_memory_mb': 293.8046875, 'final_memory_mb': 293.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.679999999999998e-05, 'quality_std': 0.05691925404970228, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
6 1 scaling_test gpt-4o 1298.2240080833435 3.3670995599405846 0.125 0.0 1.0 0 20 5 1759345683.2065425 0.0512 10240 0.9279627852934385 {'min_latency_ms': 693.6078071594238, 'max_latency_ms': 1764.8026943206787, 'p95_latency_ms': np.float64(1681.7602753639221), 'p99_latency_ms': np.float64(1748.1942105293274), 'total_time_s': 5.939830303192139, 'initial_memory_mb': 293.8046875, 'final_memory_mb': 293.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.050879141399088765, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
7 6 scaling_test gpt-4o 1264.4854545593262 3.5293826102318846 0.0 0.0 1.0 0 20 5 1759345692.6439528 0.0512 10240 0.9737471278894755 {'min_latency_ms': 175.65083503723145, 'max_latency_ms': 1990.2207851409912, 'p95_latency_ms': np.float64(1910.3824019432068), 'p99_latency_ms': np.float64(1974.2531085014343), 'total_time_s': 5.66671347618103, 'initial_memory_mb': 293.9296875, 'final_memory_mb': 293.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.038542680129780495, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
8 11 scaling_test gpt-4o 1212.0607376098633 3.799000004302323 0.125 0.0 1.0 0 20 5 1759345701.8719423 0.0512 10240 0.9366077507029601 {'min_latency_ms': 542.8001880645752, 'max_latency_ms': 1973.801851272583, 'p95_latency_ms': np.float64(1969.2555904388428), 'p99_latency_ms': np.float64(1972.892599105835), 'total_time_s': 5.264543294906616, 'initial_memory_mb': 293.9296875, 'final_memory_mb': 294.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.044670864578792276, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
9 16 scaling_test gpt-4o 1367.1631932258606 3.1229790107314654 0.0 0.0 1.0 0 20 5 1759345711.9738443 0.0512 10240 0.9328922198254587 {'min_latency_ms': 715.888261795044, 'max_latency_ms': 1905.6315422058105, 'p95_latency_ms': np.float64(1890.480661392212), 'p99_latency_ms': np.float64(1902.6013660430908), 'total_time_s': 6.404141664505005, 'initial_memory_mb': 294.0546875, 'final_memory_mb': 294.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.05146728864962903, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
10 1 scaling_test gpt-4-turbo 1429.1370868682861 3.3141614744089267 0.125 0.0 1.0 0 20 5 1759345722.7650242 0.1024 10240 0.960928099222926 {'min_latency_ms': 637.6686096191406, 'max_latency_ms': 1994.9300289154053, 'p95_latency_ms': np.float64(1973.6997246742249), 'p99_latency_ms': np.float64(1990.6839680671692), 'total_time_s': 6.0347089767456055, 'initial_memory_mb': 294.0546875, 'final_memory_mb': 294.1796875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.0429193742204114, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
11 6 scaling_test gpt-4-turbo 1167.8012132644653 3.933946564951724 0.0 0.0 1.0 0 20 5 1759345731.809648 0.1024 10240 0.9575695597206497 {'min_latency_ms': 521.2328433990479, 'max_latency_ms': 1973.503828048706, 'p95_latency_ms': np.float64(1931.3542008399963), 'p99_latency_ms': np.float64(1965.073902606964), 'total_time_s': 5.083953142166138, 'initial_memory_mb': 294.1796875, 'final_memory_mb': 294.1796875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.04742414087184447, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
12 11 scaling_test gpt-4-turbo 1435.1954460144043 3.0793869953124613 0.0 0.0 1.0 0 20 5 1759345741.9117725 0.1024 10240 0.9564233524947511 {'min_latency_ms': 711.4903926849365, 'max_latency_ms': 2034.2109203338623, 'p95_latency_ms': np.float64(1998.979663848877), 'p99_latency_ms': np.float64(2027.1646690368652), 'total_time_s': 6.4947991371154785, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.03428874308764032, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
13 16 scaling_test gpt-4-turbo 1092.1013355255127 4.057819053252887 0.0 0.0 1.0 0 20 5 1759345749.8833907 0.1024 10240 0.9521218582720758 {'min_latency_ms': 554.4416904449463, 'max_latency_ms': 1968.658447265625, 'p95_latency_ms': np.float64(1637.098050117493), 'p99_latency_ms': np.float64(1902.346367835998), 'total_time_s': 4.92875599861145, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.043763298033728824, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
14 1 scaling_test claude-3-5-sonnet 1046.9236850738525 4.047496446876068 0.0 0.0 1.0 0 20 5 1759345757.9539518 0.03071999999999999 10240 0.9511838758969231 {'min_latency_ms': 184.94415283203125, 'max_latency_ms': 1966.0136699676514, 'p95_latency_ms': np.float64(1677.8094530105593), 'p99_latency_ms': np.float64(1908.3728265762325), 'total_time_s': 4.941326141357422, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999996, 'quality_std': 0.03727295215254124, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
15 6 scaling_test claude-3-5-sonnet 1381.3772201538086 3.283979343278356 0.0 0.0 1.0 0 20 5 1759345768.7153368 0.03071999999999999 10240 0.957817098536435 {'min_latency_ms': 543.0643558502197, 'max_latency_ms': 1937.4654293060303, 'p95_latency_ms': np.float64(1931.4598441123962), 'p99_latency_ms': np.float64(1936.2643122673035), 'total_time_s': 6.090172290802002, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999996, 'quality_std': 0.044335695599357156, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
16 11 scaling_test claude-3-5-sonnet 1314.3961310386658 3.5243521468336656 0.0 0.0 1.0 0 20 5 1759345778.6269403 0.03071999999999999 10240 0.9749641888502683 {'min_latency_ms': 535.1722240447998, 'max_latency_ms': 1983.6831092834473, 'p95_latency_ms': np.float64(1918.512487411499), 'p99_latency_ms': np.float64(1970.6489849090576), 'total_time_s': 5.674801826477051, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999996, 'quality_std': 0.03856740540886548, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
17 16 scaling_test claude-3-5-sonnet 1120.720875263214 3.7028070875807546 0.0 0.0 1.0 0 20 5 1759345788.3161702 0.03071999999999999 10240 0.9344569749738585 {'min_latency_ms': 207.9324722290039, 'max_latency_ms': 2018.561601638794, 'p95_latency_ms': np.float64(1963.4979844093323), 'p99_latency_ms': np.float64(2007.5488781929016), 'total_time_s': 5.401307582855225, 'initial_memory_mb': 294.3046875, 'final_memory_mb': 294.3046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999996, 'quality_std': 0.04750434388073592, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
18 1 scaling_test claude-3-haiku 1268.5401320457458 3.539921687652236 0.0 0.0 1.0 0 20 5 1759345797.6495905 0.0256 10240 0.8406194607723803 {'min_latency_ms': 534.9514484405518, 'max_latency_ms': 1956.9103717803955, 'p95_latency_ms': np.float64(1938.3319020271301), 'p99_latency_ms': np.float64(1953.1946778297424), 'total_time_s': 5.6498425006866455, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.053962632063170944, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
19 6 scaling_test claude-3-haiku 1377.644693851471 3.189212271479164 0.0 0.0 1.0 0 20 5 1759345808.2179801 0.0256 10240 0.8370154862115219 {'min_latency_ms': 661.4456176757812, 'max_latency_ms': 2013.9634609222412, 'p95_latency_ms': np.float64(1985.2455973625183), 'p99_latency_ms': np.float64(2008.2198882102966), 'total_time_s': 6.271141052246094, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.057589803133820325, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
20 11 scaling_test claude-3-haiku 1161.9974493980408 3.6778795132801156 0.0 0.0 1.0 0 20 5 1759345817.2541294 0.0256 10240 0.8421329247896683 {'min_latency_ms': 549.6580600738525, 'max_latency_ms': 1785.23588180542, 'p95_latency_ms': np.float64(1730.9520959854126), 'p99_latency_ms': np.float64(1774.3791246414185), 'total_time_s': 5.437916040420532, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.05774508247670216, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
21 16 scaling_test claude-3-haiku 1365.4750227928162 2.998821435629251 0.0 0.0 1.0 0 20 5 1759345827.8750126 0.0256 10240 0.8483772503724578 {'min_latency_ms': 767.146110534668, 'max_latency_ms': 1936.8767738342285, 'p95_latency_ms': np.float64(1919.3583130836487), 'p99_latency_ms': np.float64(1933.3730816841125), 'total_time_s': 6.669286727905273, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.05705131022796498, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
22 1 scaling_test claude-3-sonnet 1360.187566280365 3.089520735450049 0.0 0.0 1.0 0 20 5 1759345837.7737727 0.15360000000000001 10240 0.8835217044830507 {'min_latency_ms': 550.3547191619873, 'max_latency_ms': 1977.1480560302734, 'p95_latency_ms': np.float64(1924.659264087677), 'p99_latency_ms': np.float64(1966.6502976417542), 'total_time_s': 6.473495960235596, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.058452629496046606, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
23 6 scaling_test claude-3-sonnet 1256.138801574707 3.4732685564079335 0.0 0.0 1.0 0 20 5 1759345848.5701082 0.15360000000000001 10240 0.8863139635356961 {'min_latency_ms': 641.2796974182129, 'max_latency_ms': 1980.7326793670654, 'p95_latency_ms': np.float64(1846.4025855064392), 'p99_latency_ms': np.float64(1953.86666059494), 'total_time_s': 5.758264780044556, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.05783521510861833, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
24 11 scaling_test claude-3-sonnet 1306.07008934021 3.5020347317551495 0.0 0.0 1.0 0 20 5 1759345858.6472163 0.15360000000000001 10240 0.9094961422561505 {'min_latency_ms': 591.8083190917969, 'max_latency_ms': 1971.1270332336426, 'p95_latency_ms': np.float64(1944.3620324134827), 'p99_latency_ms': np.float64(1965.7740330696106), 'total_time_s': 5.710965633392334, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.042442911768923584, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
25 16 scaling_test claude-3-sonnet 1307.1481943130493 3.262938882676132 0.0 0.0 1.0 0 20 5 1759345869.905544 0.15360000000000001 10240 0.8938240662052681 {'min_latency_ms': 646.7251777648926, 'max_latency_ms': 1990.9627437591553, 'p95_latency_ms': np.float64(1935.0676536560059), 'p99_latency_ms': np.float64(1979.7837257385254), 'total_time_s': 6.129443645477295, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.04247877605865338, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
26 1 scaling_test gemini-1.5-pro 1401.3476371765137 2.943218490521141 0.0 0.0 1.0 0 20 5 1759345881.238218 0.0128 10240 0.9409363720199192 {'min_latency_ms': 520.9827423095703, 'max_latency_ms': 1970.2589511871338, 'p95_latency_ms': np.float64(1958.1118822097778), 'p99_latency_ms': np.float64(1967.8295373916626), 'total_time_s': 6.7952821254730225, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.05267230653872383, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
27 6 scaling_test gemini-1.5-pro 1341.485834121704 3.3982951582179024 0.0 0.0 1.0 0 20 5 1759345889.5553467 0.0128 10240 0.9355344625586725 {'min_latency_ms': 503.9515495300293, 'max_latency_ms': 1978.0657291412354, 'p95_latency_ms': np.float64(1966.320013999939), 'p99_latency_ms': np.float64(1975.716586112976), 'total_time_s': 5.885303974151611, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.054780000845711954, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
28 11 scaling_test gemini-1.5-pro 1344.3536400794983 3.445457146125384 0.0 0.0 1.0 0 20 5 1759345898.4512925 0.0128 10240 0.9276983017835836 {'min_latency_ms': 615.3252124786377, 'max_latency_ms': 1981.612205505371, 'p95_latency_ms': np.float64(1803.935217857361), 'p99_latency_ms': np.float64(1946.0768079757688), 'total_time_s': 5.8047449588775635, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.05905363250623063, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
29 16 scaling_test gemini-1.5-pro 1202.2199511528015 3.696869831400932 0.0 0.0 1.0 0 20 5 1759345907.5707264 0.0128 10240 0.9307740387961949 {'min_latency_ms': 589.9953842163086, 'max_latency_ms': 1967.3075675964355, 'p95_latency_ms': np.float64(1913.6008977890015), 'p99_latency_ms': np.float64(1956.5662336349487), 'total_time_s': 5.409982204437256, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.04978369465928124, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
30 1 scaling_test gemini-1.5-flash 1053.9512276649475 3.823265280376166 0.0 0.0 1.0 0 20 5 1759345915.0947819 0.007679999999999998 10240 0.8813998853517441 {'min_latency_ms': -36.76271438598633, 'max_latency_ms': 1967.0710563659668, 'p95_latency_ms': np.float64(1855.4362535476685), 'p99_latency_ms': np.float64(1944.744095802307), 'total_time_s': 5.231130599975586, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0003839999999999999, 'quality_std': 0.050008698196664016, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
31 6 scaling_test gemini-1.5-flash 1155.3911447525024 3.615636866719992 0.0 0.0 1.0 0 20 5 1759345925.0694563 0.007679999999999998 10240 0.9025102091839412 {'min_latency_ms': 502.6116371154785, 'max_latency_ms': 1947.0453262329102, 'p95_latency_ms': np.float64(1765.414369106293), 'p99_latency_ms': np.float64(1910.7191348075864), 'total_time_s': 5.531528949737549, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0003839999999999999, 'quality_std': 0.059194105459554974, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
32 11 scaling_test gemini-1.5-flash 1217.6612257957458 3.756965086673101 0.0 0.0 1.0 0 20 5 1759345934.1183383 0.007679999999999998 10240 0.8709830012564668 {'min_latency_ms': 560.8868598937988, 'max_latency_ms': 2007.932424545288, 'p95_latency_ms': np.float64(1776.0017752647402), 'p99_latency_ms': np.float64(1961.5462946891782), 'total_time_s': 5.323445796966553, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0003839999999999999, 'quality_std': 0.052873446152615404, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
33 16 scaling_test gemini-1.5-flash 1351.5228390693665 3.367995990496259 0.0 0.0 1.0 0 20 5 1759345942.2099788 0.007679999999999998 10240 0.872315613940513 {'min_latency_ms': 689.1014575958252, 'max_latency_ms': 1980.147361755371, 'p95_latency_ms': np.float64(1956.2964797019958), 'p99_latency_ms': np.float64(1975.377185344696), 'total_time_s': 5.938249349594116, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0003839999999999999, 'quality_std': 0.05361394744479093, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
34 1 scaling_test llama-3.1-8b 1306.591236591339 3.3070039261320594 0.0 0.0 1.0 0 20 5 1759345952.8692935 0.002048000000000001 10240 0.7778348786353027 {'min_latency_ms': 555.4070472717285, 'max_latency_ms': 1988.0244731903076, 'p95_latency_ms': np.float64(1957.3988199234009), 'p99_latency_ms': np.float64(1981.8993425369263), 'total_time_s': 6.047770261764526, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000006, 'quality_std': 0.05832225784189981, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
35 6 scaling_test llama-3.1-8b 1199.6222853660583 3.634358086220239 0.0 0.0 1.0 0 20 5 1759345963.5152647 0.002048000000000001 10240 0.7696592403957419 {'min_latency_ms': 541.0621166229248, 'max_latency_ms': 1914.41011428833, 'p95_latency_ms': np.float64(1768.0468797683716), 'p99_latency_ms': np.float64(1885.1374673843382), 'total_time_s': 5.503035068511963, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000006, 'quality_std': 0.06176209698043544, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
36 11 scaling_test llama-3.1-8b 1143.358552455902 4.173916297150752 0.0 0.0 1.0 0 20 5 1759345973.8406181 0.002048000000000001 10240 0.7857043630038748 {'min_latency_ms': 631.817102432251, 'max_latency_ms': 1720.1111316680908, 'p95_latency_ms': np.float64(1547.544610500336), 'p99_latency_ms': np.float64(1685.5978274345396), 'total_time_s': 4.791662931442261, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000006, 'quality_std': 0.06142254552174686, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
37 16 scaling_test llama-3.1-8b 1228.6048531532288 3.613465135130269 0.0 0.0 1.0 0 20 5 1759345982.2759545 0.002048000000000001 10240 0.7706622409066766 {'min_latency_ms': 539.0913486480713, 'max_latency_ms': 1971.7633724212646, 'p95_latency_ms': np.float64(1819.2362308502197), 'p99_latency_ms': np.float64(1941.2579441070554), 'total_time_s': 5.534853458404541, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000006, 'quality_std': 0.05320944570994387, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
38 1 scaling_test llama-3.1-70b 1424.0724563598633 2.989394263900763 0.0 0.0 1.0 0 20 5 1759345993.4949126 0.008192000000000005 10240 0.8731561293258354 {'min_latency_ms': 700.6974220275879, 'max_latency_ms': 1959.3937397003174, 'p95_latency_ms': np.float64(1924.493396282196), 'p99_latency_ms': np.float64(1952.4136710166931), 'total_time_s': 6.690318584442139, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00040960000000000025, 'quality_std': 0.0352234743129485, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
39 6 scaling_test llama-3.1-70b 1090.003514289856 4.145917207566353 0.0 0.0 1.0 0 20 5 1759346002.3353932 0.008192000000000005 10240 0.8796527768140011 {'min_latency_ms': 508.23211669921875, 'max_latency_ms': 1798.6392974853516, 'p95_latency_ms': np.float64(1785.5579257011414), 'p99_latency_ms': np.float64(1796.0230231285095), 'total_time_s': 4.824023008346558, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00040960000000000025, 'quality_std': 0.06407982743031454, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
40 11 scaling_test llama-3.1-70b 964.3666982650757 4.70392645090585 0.0 0.0 1.0 0 20 5 1759346010.6974216 0.008192000000000005 10240 0.8992009479579495 {'min_latency_ms': 135.56504249572754, 'max_latency_ms': 1794.3906784057617, 'p95_latency_ms': np.float64(1775.5030393600464), 'p99_latency_ms': np.float64(1790.6131505966187), 'total_time_s': 4.251767158508301, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.4296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00040960000000000025, 'quality_std': 0.050182727925105516, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
41 16 scaling_test llama-3.1-70b 1258.9476823806763 3.653831604110515 0.125 0.0 1.0 0 20 5 1759346020.388094 0.008192000000000005 10240 0.8930892849911802 {'min_latency_ms': 620.0413703918457, 'max_latency_ms': 1916.384220123291, 'p95_latency_ms': np.float64(1765.2448296546936), 'p99_latency_ms': np.float64(1886.1563420295713), 'total_time_s': 5.473706007003784, 'initial_memory_mb': 294.4296875, 'final_memory_mb': 294.5546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00040960000000000025, 'quality_std': 0.04969618373257882, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
42 5 concurrent_test gpt-4o-mini 1273.702096939087 0.7851086796926611 0.0 0.0 1.0 0 10 1 1759346033.2373884 0.0007680000000000001 5120 0.8342026655690804 {'min_latency_ms': 741.3482666015625, 'max_latency_ms': 1817.1906471252441, 'p95_latency_ms': np.float64(1794.5520520210266), 'p99_latency_ms': np.float64(1812.6629281044006), 'total_time_s': 12.737090110778809, 'initial_memory_mb': 294.5546875, 'final_memory_mb': 294.5546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000001e-05, 'quality_std': 0.0446055902590032, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
43 5 concurrent_test gpt-4o-mini 1511.399483680725 2.933763102440156 0.25 0.0 1.0 0 10 6 1759346036.647214 0.0007680000000000001 5120 0.8471277213854321 {'min_latency_ms': 800.0023365020752, 'max_latency_ms': 1982.2335243225098, 'p95_latency_ms': np.float64(1942.5656914710999), 'p99_latency_ms': np.float64(1974.2999577522278), 'total_time_s': 3.4085915088653564, 'initial_memory_mb': 294.5546875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000001e-05, 'quality_std': 0.06432848764341552, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
44 5 concurrent_test gpt-4o 1150.0491619110107 0.8695228900132853 0.0 0.0 1.0 0 10 1 1759346048.2587333 0.0256 5120 0.9599583095352598 {'min_latency_ms': 544.191837310791, 'max_latency_ms': 1584.9177837371826, 'p95_latency_ms': np.float64(1511.2051010131834), 'p99_latency_ms': np.float64(1570.1752471923828), 'total_time_s': 11.50055980682373, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.057087428808928614, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
45 5 concurrent_test gpt-4o 1241.9081926345825 3.22981029743519 0.0 0.0 1.0 0 10 6 1759346051.3563757 0.0256 5120 0.9585199558650109 {'min_latency_ms': 644.8915004730225, 'max_latency_ms': 1933.1202507019043, 'p95_latency_ms': np.float64(1865.2720570564268), 'p99_latency_ms': np.float64(1919.5506119728088), 'total_time_s': 3.0961570739746094, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00256, 'quality_std': 0.04062204558012218, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
46 5 concurrent_test gpt-4-turbo 1581.8750381469727 0.6321581179029606 0.0 0.0 1.0 0 10 1 1759346067.3017964 0.0512 5120 0.9324427514695872 {'min_latency_ms': 833.935022354126, 'max_latency_ms': 2019.5622444152832, 'p95_latency_ms': np.float64(1978.4671545028687), 'p99_latency_ms': np.float64(2011.3432264328003), 'total_time_s': 15.818827152252197, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.04654046504268862, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
47 5 concurrent_test gpt-4-turbo 1153.432297706604 3.2168993240245847 0.0 0.0 1.0 0 10 6 1759346070.4116762 0.0512 5120 0.9790878168553954 {'min_latency_ms': 635.2591514587402, 'max_latency_ms': 1833.7628841400146, 'p95_latency_ms': np.float64(1808.298635482788), 'p99_latency_ms': np.float64(1828.6700344085693), 'total_time_s': 3.108583450317383, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00512, 'quality_std': 0.038783270511690816, 'data_size_processed': 1000, 'model_provider': 'gpt'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
48 5 concurrent_test claude-3-5-sonnet 1397.6783752441406 0.7154680102707422 0.0 0.0 1.0 0 10 1 1759346084.5017824 0.015359999999999999 5120 0.9421283071854264 {'min_latency_ms': 532.8092575073242, 'max_latency_ms': 2028.5301208496094, 'p95_latency_ms': np.float64(1968.815779685974), 'p99_latency_ms': np.float64(2016.5872526168823), 'total_time_s': 13.976865291595459, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999998, 'quality_std': 0.041911119259679885, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
49 5 concurrent_test claude-3-5-sonnet 1215.26198387146 3.6278421983995233 0.0 0.0 1.0 0 10 6 1759346087.2596216 0.015359999999999999 5120 0.9131170426955485 {'min_latency_ms': 568.2053565979004, 'max_latency_ms': 1612.9648685455322, 'p95_latency_ms': np.float64(1559.6276402473447), 'p99_latency_ms': np.float64(1602.2974228858948), 'total_time_s': 2.7564594745635986, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0015359999999999998, 'quality_std': 0.04319876804321411, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
50 5 concurrent_test claude-3-haiku 1299.2276906967163 0.7696826190331395 0.0 0.0 1.0 0 10 1 1759346100.364407 0.0128 5120 0.8252745814485088 {'min_latency_ms': 668.3671474456787, 'max_latency_ms': 2041.351318359375, 'p95_latency_ms': np.float64(1843.0875778198238), 'p99_latency_ms': np.float64(2001.6985702514648), 'total_time_s': 12.992368221282959, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.058205855327116265, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
51 5 concurrent_test claude-3-haiku 1297.508192062378 3.6581654644321087 0.0 0.0 1.0 0 10 6 1759346103.0993996 0.0128 5120 0.8496515913760503 {'min_latency_ms': 649.4293212890625, 'max_latency_ms': 1873.1675148010254, 'p95_latency_ms': np.float64(1843.8988208770752), 'p99_latency_ms': np.float64(1867.3137760162354), 'total_time_s': 2.7336106300354004, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00128, 'quality_std': 0.06872259975771335, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
52 5 concurrent_test claude-3-sonnet 1239.8123741149902 0.8065692205263874 0.0 0.0 1.0 0 10 1 1759346114.9650035 0.07680000000000001 5120 0.8917269647002374 {'min_latency_ms': 559.9334239959717, 'max_latency_ms': 1828.9196491241455, 'p95_latency_ms': np.float64(1804.089903831482), 'p99_latency_ms': np.float64(1823.9537000656128), 'total_time_s': 12.398191928863525, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.06728256480558785, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
53 5 concurrent_test claude-3-sonnet 1325.3875255584717 3.2305613290400945 0.0 0.0 1.0 0 10 6 1759346118.062173 0.07680000000000001 5120 0.8904253939966993 {'min_latency_ms': 598.4294414520264, 'max_latency_ms': 1956.3815593719482, 'p95_latency_ms': np.float64(1906.8223834037778), 'p99_latency_ms': np.float64(1946.4697241783142), 'total_time_s': 3.0954372882843018, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000001, 'quality_std': 0.06220445402424322, 'data_size_processed': 1000, 'model_provider': 'claude'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
54 5 concurrent_test gemini-1.5-pro 1264.2754554748535 0.7909630217832475 0.0 0.0 1.0 0 10 1 1759346130.8282964 0.0064 5120 0.8998460053229075 {'min_latency_ms': 532.9890251159668, 'max_latency_ms': 1795.492172241211, 'p95_latency_ms': np.float64(1745.6329107284544), 'p99_latency_ms': np.float64(1785.5203199386597), 'total_time_s': 12.642816066741943, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.04050886994282564, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
55 5 concurrent_test gemini-1.5-pro 1342.9006338119507 3.7829150181123015 0.0 0.0 1.0 0 10 6 1759346133.472956 0.0064 5120 0.9029938738274873 {'min_latency_ms': 701.9498348236084, 'max_latency_ms': 1964.576005935669, 'p95_latency_ms': np.float64(1872.5560665130613), 'p99_latency_ms': np.float64(1946.1720180511475), 'total_time_s': 2.6434640884399414, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00064, 'quality_std': 0.05723923041822323, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
56 5 concurrent_test gemini-1.5-flash 1368.2588577270508 0.7308515907093506 0.0 0.0 1.0 0 10 1 1759346147.2717574 0.0038399999999999997 5120 0.8795901650694117 {'min_latency_ms': 620.3913688659668, 'max_latency_ms': 2018.2685852050781, 'p95_latency_ms': np.float64(1993.7742233276367), 'p99_latency_ms': np.float64(2013.3697128295898), 'total_time_s': 13.682668447494507, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00038399999999999996, 'quality_std': 0.05927449072307118, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
57 5 concurrent_test gemini-1.5-flash 1207.8629732131958 3.2879592824302044 0.0 0.0 1.0 0 10 6 1759346150.314617 0.0038399999999999997 5120 0.8611774574826484 {'min_latency_ms': 594.973087310791, 'max_latency_ms': 1811.2657070159912, 'p95_latency_ms': np.float64(1681.6352963447569), 'p99_latency_ms': np.float64(1785.3396248817444), 'total_time_s': 3.041400194168091, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00038399999999999996, 'quality_std': 0.07904328865026665, 'data_size_processed': 1000, 'model_provider': 'gemini'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
58 5 concurrent_test llama-3.1-8b 1144.2910194396973 0.8738903631276332 0.0 0.0 1.0 0 10 1 1759346161.882389 0.0010240000000000002 5120 0.7805684315735588 {'min_latency_ms': 594.846248626709, 'max_latency_ms': 1759.0994834899902, 'p95_latency_ms': np.float64(1631.7564606666563), 'p99_latency_ms': np.float64(1733.6308789253235), 'total_time_s': 11.443083047866821, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000002, 'quality_std': 0.0613021253594286, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
59 5 concurrent_test llama-3.1-8b 1128.666615486145 3.527006383973853 0.0 0.0 1.0 0 10 6 1759346164.7190907 0.0010240000000000002 5120 0.7915276538063776 {'min_latency_ms': 610.3026866912842, 'max_latency_ms': 1934.2899322509766, 'p95_latency_ms': np.float64(1909.2738270759583), 'p99_latency_ms': np.float64(1929.286711215973), 'total_time_s': 2.835265636444092, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010240000000000002, 'quality_std': 0.055242108041169316, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
60 5 concurrent_test llama-3.1-70b 1341.410732269287 0.7454805363345477 0.0 0.0 1.0 0 10 1 1759346178.2571824 0.004096000000000001 5120 0.8513858389112968 {'min_latency_ms': 566.3845539093018, 'max_latency_ms': 1769.1750526428223, 'p95_latency_ms': np.float64(1743.9924359321594), 'p99_latency_ms': np.float64(1764.1385293006897), 'total_time_s': 13.414166450500488, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004096000000000001, 'quality_std': 0.06286695897481548, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
61 5 concurrent_test llama-3.1-70b 1410.3811264038086 3.52022788340447 0.0 0.0 1.0 0 10 6 1759346181.0992308 0.004096000000000001 5120 0.8534058400920448 {'min_latency_ms': 572.9773044586182, 'max_latency_ms': 1928.0850887298584, 'p95_latency_ms': np.float64(1903.529143333435), 'p99_latency_ms': np.float64(1923.1738996505737), 'total_time_s': 2.8407251834869385, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004096000000000001, 'quality_std': 0.059750620144052545, 'data_size_processed': 1000, 'model_provider': 'llama'} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
62 5 memory_test gpt-4o-mini 1177.2440481185913 3.97501008701798 0.0 0.0 1.0 0 50 5 1759346193.7901201 0.0038400000000000023 25600 0.8512259391579574 {'min_latency_ms': 537.5485420227051, 'max_latency_ms': 2001.0862350463867, 'p95_latency_ms': np.float64(1892.5400853157041), 'p99_latency_ms': np.float64(1985.4257130622864), 'total_time_s': 12.578584432601929, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000005e-05, 'quality_std': 0.0581968026848211, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
63 5 memory_test gpt-4o-mini 1229.8026752471924 3.9282369679460363 0.0 0.0 1.0 0 50 5 1759346206.6300905 0.0038400000000000023 25600 0.8537868196468017 {'min_latency_ms': 518.6026096343994, 'max_latency_ms': 1944.331407546997, 'p95_latency_ms': np.float64(1909.6850633621214), 'p99_latency_ms': np.float64(1940.652117729187), 'total_time_s': 12.72835636138916, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000005e-05, 'quality_std': 0.05181407518487485, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
64 5 memory_test gpt-4o-mini 1274.8144483566284 3.7483119966709824 0.0 0.0 1.0 0 50 5 1759346220.0900073 0.0038400000000000023 25600 0.8487480924622282 {'min_latency_ms': 529.292106628418, 'max_latency_ms': 1996.4158535003662, 'p95_latency_ms': np.float64(1960.6919050216675), 'p99_latency_ms': np.float64(1988.2149648666382), 'total_time_s': 13.339337825775146, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 7.680000000000005e-05, 'quality_std': 0.05812899461310237, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
65 5 memory_test gpt-4o 1174.5057010650635 4.0514136389986115 0.0 0.0 1.0 0 50 5 1759346232.557784 0.12800000000000017 25600 0.9484191580718665 {'min_latency_ms': 286.58127784729004, 'max_latency_ms': 1877.345085144043, 'p95_latency_ms': np.float64(1735.1435780525208), 'p99_latency_ms': np.float64(1842.000467777252), 'total_time_s': 12.341371297836304, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.8046875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0025600000000000032, 'quality_std': 0.0491398572941036, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
66 5 memory_test gpt-4o 1225.388593673706 3.875932429633176 0.125 0.0 1.0 0 50 5 1759346245.5669534 0.12800000000000017 25600 0.9557179217710832 {'min_latency_ms': 514.6803855895996, 'max_latency_ms': 2034.6620082855225, 'p95_latency_ms': np.float64(1909.4360709190366), 'p99_latency_ms': np.float64(2010.34743309021), 'total_time_s': 12.900121688842773, 'initial_memory_mb': 294.8046875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0025600000000000032, 'quality_std': 0.04870463047338363, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
67 5 memory_test gpt-4o 1244.0021991729736 3.7266446101546777 0.0 0.0 1.0 0 50 5 1759346259.1414776 0.12800000000000017 25600 0.9458944372937584 {'min_latency_ms': 521.9912528991699, 'max_latency_ms': 1986.6855144500732, 'p95_latency_ms': np.float64(1953.3554077148438), 'p99_latency_ms': np.float64(1978.9683985710144), 'total_time_s': 13.416895151138306, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0025600000000000032, 'quality_std': 0.04851286804634898, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
68 5 memory_test gpt-4-turbo 1181.3615322113037 4.124998416603219 0.0 0.0 1.0 0 50 5 1759346271.374578 0.25600000000000034 25600 0.9651345363111258 {'min_latency_ms': 353.2071113586426, 'max_latency_ms': 1966.524362564087, 'p95_latency_ms': np.float64(1945.0057744979858), 'p99_latency_ms': np.float64(1965.7717752456665), 'total_time_s': 12.121216773986816, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0051200000000000065, 'quality_std': 0.04338778763022959, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
69 5 memory_test gpt-4-turbo 1291.4055681228638 3.77552400952112 0.0 0.0 1.0 0 50 5 1759346284.731812 0.25600000000000034 25600 0.9689389907566063 {'min_latency_ms': 555.095911026001, 'max_latency_ms': 2027.0910263061523, 'p95_latency_ms': np.float64(1966.5393114089964), 'p99_latency_ms': np.float64(2018.9284563064575), 'total_time_s': 13.243194818496704, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0051200000000000065, 'quality_std': 0.04154143035607859, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
70 5 memory_test gpt-4-turbo 1261.4208269119263 3.663208321130074 0.0 0.0 1.0 0 50 5 1759346298.4905493 0.25600000000000034 25600 0.9573488473081913 {'min_latency_ms': 284.8320007324219, 'max_latency_ms': 2011.866807937622, 'p95_latency_ms': np.float64(1975.5298137664795), 'p99_latency_ms': np.float64(2000.7115292549133), 'total_time_s': 13.649237394332886, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0051200000000000065, 'quality_std': 0.04380501534660363, 'data_size_processed': 1000, 'model_provider': 'gpt', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
71 5 memory_test claude-3-5-sonnet 1270.3543138504028 3.7944320989090614 0.0 0.0 1.0 0 50 5 1759346311.7936022 0.07680000000000001 25600 0.948463600922609 {'min_latency_ms': 622.9770183563232, 'max_latency_ms': 1970.0510501861572, 'p95_latency_ms': np.float64(1868.455410003662), 'p99_latency_ms': np.float64(1957.5506472587585), 'total_time_s': 13.177202463150024, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.001536, 'quality_std': 0.04872900892927657, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
72 5 memory_test claude-3-5-sonnet 1154.527621269226 4.107802148818313 0.0 0.0 1.0 0 50 5 1759346324.0782034 0.07680000000000001 25600 0.9535056752128789 {'min_latency_ms': 526.8404483795166, 'max_latency_ms': 1841.3877487182617, 'p95_latency_ms': np.float64(1815.3946280479431), 'p99_latency_ms': np.float64(1837.1384692192078), 'total_time_s': 12.171959161758423, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.001536, 'quality_std': 0.04600056992617095, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
73 5 memory_test claude-3-5-sonnet 1341.6658163070679 3.5050325493977805 0.0 0.0 1.0 0 50 5 1759346338.4560573 0.07680000000000001 25600 0.947231761746643 {'min_latency_ms': 607.1841716766357, 'max_latency_ms': 1968.3496952056885, 'p95_latency_ms': np.float64(1938.420307636261), 'p99_latency_ms': np.float64(1963.8122081756592), 'total_time_s': 14.265202760696411, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 294.9296875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.001536, 'quality_std': 0.0468041040494112, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
74 5 memory_test claude-3-haiku 1268.9041805267334 3.6527405734902607 0.125 0.0 1.0 0 50 5 1759346352.2760284 0.06400000000000008 25600 0.8657832919908838 {'min_latency_ms': 576.9007205963135, 'max_latency_ms': 1978.3263206481934, 'p95_latency_ms': np.float64(1900.9657382965088), 'p99_latency_ms': np.float64(1977.4397349357605), 'total_time_s': 13.688352346420288, 'initial_memory_mb': 294.9296875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0012800000000000016, 'quality_std': 0.05791027367020173, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
75 5 memory_test claude-3-haiku 1273.6989831924438 3.7602543777430877 0.0 0.0 1.0 0 50 5 1759346365.681829 0.06400000000000008 25600 0.8396294693060197 {'min_latency_ms': 521.7316150665283, 'max_latency_ms': 1988.7199401855469, 'p95_latency_ms': np.float64(1945.9344744682312), 'p99_latency_ms': np.float64(1987.1683859825134), 'total_time_s': 13.296972751617432, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0012800000000000016, 'quality_std': 0.06291349263235946, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
76 5 memory_test claude-3-haiku 1234.9269914627075 3.9335082345318124 0.0 0.0 1.0 0 50 5 1759346378.5192664 0.06400000000000008 25600 0.8469784358915146 {'min_latency_ms': 529.503345489502, 'max_latency_ms': 1981.7008972167969, 'p95_latency_ms': np.float64(1859.1547846794128), 'p99_latency_ms': np.float64(1963.3227896690369), 'total_time_s': 12.711299180984497, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0012800000000000016, 'quality_std': 0.061722943046806616, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
77 5 memory_test claude-3-sonnet 1195.9008169174194 4.06962738382444 0.0 0.0 1.0 0 50 5 1759346390.9144897 0.3840000000000003 25600 0.9026531444228556 {'min_latency_ms': -36.6673469543457, 'max_latency_ms': 1991.610050201416, 'p95_latency_ms': np.float64(1819.4202184677124), 'p99_latency_ms': np.float64(1987.222683429718), 'total_time_s': 12.286137104034424, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000005, 'quality_std': 0.058229589360407986, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
78 5 memory_test claude-3-sonnet 1372.0379829406738 3.502253345465805 0.0 0.0 1.0 0 50 5 1759346405.3043494 0.3840000000000003 25600 0.8837364473272626 {'min_latency_ms': 543.1270599365234, 'max_latency_ms': 1992.779016494751, 'p95_latency_ms': np.float64(1931.822681427002), 'p99_latency_ms': np.float64(1987.4089169502258), 'total_time_s': 14.276522874832153, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000005, 'quality_std': 0.05634614113838598, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
79 5 memory_test claude-3-sonnet 1257.2709035873413 3.7764857062182706 0.0 0.0 1.0 0 50 5 1759346418.6521854 0.3840000000000003 25600 0.9053414058751514 {'min_latency_ms': 529.8404693603516, 'max_latency_ms': 1990.1280403137207, 'p95_latency_ms': np.float64(1911.1806631088257), 'p99_latency_ms': np.float64(1976.6331052780151), 'total_time_s': 13.239822387695312, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.007680000000000005, 'quality_std': 0.050506656009957705, 'data_size_processed': 1000, 'model_provider': 'claude', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
80 5 memory_test gemini-1.5-pro 1221.5951490402222 3.8372908969845323 0.0 0.0 1.0 0 50 5 1759346431.7921565 0.03200000000000004 25600 0.9365925291921394 {'min_latency_ms': 329.1811943054199, 'max_latency_ms': 1995.384693145752, 'p95_latency_ms': np.float64(1965.0332808494568), 'p99_latency_ms': np.float64(1988.3063769340515), 'total_time_s': 13.030025959014893, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0006400000000000008, 'quality_std': 0.04847128641002876, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
81 5 memory_test gemini-1.5-pro 1351.8355464935303 3.6227975436552606 0.0 0.0 1.0 0 50 5 1759346445.7126448 0.03200000000000004 25600 0.9323552590826123 {'min_latency_ms': 515.129566192627, 'max_latency_ms': 2008.0702304840088, 'p95_latency_ms': np.float64(1958.6564779281616), 'p99_latency_ms': np.float64(2004.1296029090881), 'total_time_s': 13.801488876342773, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0006400000000000008, 'quality_std': 0.055840796126395656, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
82 5 memory_test gemini-1.5-pro 1240.622534751892 3.8813384098374453 0.0 0.0 1.0 0 50 5 1759346458.7192729 0.03200000000000004 25600 0.9407390543744837 {'min_latency_ms': -29.146671295166016, 'max_latency_ms': 1934.4398975372314, 'p95_latency_ms': np.float64(1849.7230291366577), 'p99_latency_ms': np.float64(1918.0084466934204), 'total_time_s': 12.8821542263031, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0006400000000000008, 'quality_std': 0.050597003908357786, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
83 5 memory_test gemini-1.5-flash 1237.6702642440796 3.812923495644346 0.0 0.0 1.0 0 50 5 1759346471.9588974 0.019200000000000002 25600 0.8556073429019542 {'min_latency_ms': 536.4787578582764, 'max_latency_ms': 2010.1728439331055, 'p95_latency_ms': np.float64(1911.8669629096985), 'p99_latency_ms': np.float64(1976.080708503723), 'total_time_s': 13.113297462463379, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.000384, 'quality_std': 0.06082135675952047, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
84 5 memory_test gemini-1.5-flash 1180.0980806350708 4.016049090832003 0.0 0.0 1.0 0 50 5 1759346484.5327744 0.019200000000000002 25600 0.8718428063415768 {'min_latency_ms': 109.58051681518555, 'max_latency_ms': 1993.358850479126, 'p95_latency_ms': np.float64(1872.3165988922117), 'p99_latency_ms': np.float64(1992.416422367096), 'total_time_s': 12.450047016143799, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.000384, 'quality_std': 0.0613916834940056, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
85 5 memory_test gemini-1.5-flash 1194.4490098953247 4.009936119483076 0.0 0.0 1.0 0 50 5 1759346497.1201088 0.019200000000000002 25600 0.8652112059805899 {'min_latency_ms': 520.3211307525635, 'max_latency_ms': 1942.4259662628174, 'p95_latency_ms': np.float64(1834.6370577812195), 'p99_latency_ms': np.float64(1890.3984904289243), 'total_time_s': 12.469026565551758, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.000384, 'quality_std': 0.05312368368226588, 'data_size_processed': 1000, 'model_provider': 'gemini', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
86 5 memory_test llama-3.1-8b 1306.2016773223877 3.683763547696555 0.0 0.0 1.0 0 50 5 1759346510.812732 0.005119999999999998 25600 0.7727309350554936 {'min_latency_ms': 527.4953842163086, 'max_latency_ms': 1997.086524963379, 'p95_latency_ms': np.float64(1942.7793741226194), 'p99_latency_ms': np.float64(1994.0643763542175), 'total_time_s': 13.573075294494629, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010239999999999995, 'quality_std': 0.05596283861854901, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
87 5 memory_test llama-3.1-8b 1304.1251468658447 3.617383744773005 0.0 0.0 1.0 0 50 5 1759346524.7711937 0.005119999999999998 25600 0.785787220179362 {'min_latency_ms': 112.00571060180664, 'max_latency_ms': 2015.146255493164, 'p95_latency_ms': np.float64(2001.4938592910767), 'p99_latency_ms': np.float64(2012.321424484253), 'total_time_s': 13.822144269943237, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010239999999999995, 'quality_std': 0.0552285639827787, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
88 5 memory_test llama-3.1-8b 1290.5346298217773 3.671522710311051 0.0 0.0 1.0 0 50 5 1759346538.5084107 0.005119999999999998 25600 0.7771978709125356 {'min_latency_ms': 565.7510757446289, 'max_latency_ms': 1945.1093673706055, 'p95_latency_ms': np.float64(1906.785237789154), 'p99_latency_ms': np.float64(1942.4526476860046), 'total_time_s': 13.618327856063843, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.00010239999999999995, 'quality_std': 0.057252814774054535, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
89 5 memory_test llama-3.1-70b 1213.9334726333618 3.947675276737486 0.0 0.0 1.0 0 50 5 1759346551.2951744 0.02047999999999999 25600 0.8683286341213061 {'min_latency_ms': -79.86569404602051, 'max_latency_ms': 2014.9149894714355, 'p95_latency_ms': np.float64(1919.9433565139768), 'p99_latency_ms': np.float64(1992.4925136566162), 'total_time_s': 12.665682077407837, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004095999999999998, 'quality_std': 0.05862810413022958, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 0} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
90 5 memory_test llama-3.1-70b 1298.1958770751953 3.7049711897976763 0.0 0.0 1.0 0 50 5 1759346564.9280033 0.02047999999999999 25600 0.8889975698232048 {'min_latency_ms': 503.5574436187744, 'max_latency_ms': 2020.4124450683594, 'p95_latency_ms': np.float64(1901.4497756958008), 'p99_latency_ms': np.float64(1986.3133001327512), 'total_time_s': 13.495381593704224, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004095999999999998, 'quality_std': 0.053463278827038344, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 1} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False
91 5 memory_test llama-3.1-70b 1187.040138244629 4.165139112812611 0.0 0.0 1.0 0 50 5 1759346577.0467978 0.02047999999999999 25600 0.8884529182459214 {'min_latency_ms': 506.2377452850342, 'max_latency_ms': 2026.6106128692627, 'p95_latency_ms': np.float64(1958.3556652069092), 'p99_latency_ms': np.float64(2007.5032830238342), 'total_time_s': 12.004400968551636, 'initial_memory_mb': 295.0546875, 'final_memory_mb': 295.0546875, 'avg_tokens_per_request': 512.0, 'cost_per_request': 0.0004095999999999998, 'quality_std': 0.05625669416735748, 'data_size_processed': 1000, 'model_provider': 'llama', 'iteration': 2} 0.0 0.0 0.0 0.0 0 False 0 0.0 0 0.0 False

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 492 KiB

@ -1,4 +1,15 @@
swarms
pytest
matplotlib
loguru
loguru
psutil
pyyaml
python-dotenv
rich
pydantic
numpy
pandas
openpyxl
seaborn
requests
swarms-memory

@ -1,64 +0,0 @@
from swarms.structs.agent import Agent
from swarms.structs.multi_agent_router import MultiAgentRouter
# Example usage:
agents = [
Agent(
agent_name="ResearchAgent",
agent_description="Specializes in researching topics and providing detailed, factual information",
system_prompt="You are a research specialist. Provide detailed, well-researched information about any topic, citing sources when possible.",
max_loops=1,
),
Agent(
agent_name="CodeExpertAgent",
agent_description="Expert in writing, reviewing, and explaining code across multiple programming languages",
system_prompt="You are a coding expert. Write, review, and explain code with a focus on best practices and clean code principles.",
max_loops=1,
),
Agent(
agent_name="WritingAgent",
agent_description="Skilled in creative and technical writing, content creation, and editing",
system_prompt="You are a writing specialist. Create, edit, and improve written content while maintaining appropriate tone and style.",
max_loops=1,
),
]
models_to_test = [
"gpt-4.1",
"gpt-4o",
"gpt-5-mini",
"o4-mini",
"o3",
"claude-opus-4-20250514",
"claude-sonnet-4-20250514",
"claude-3-7-sonnet-20250219",
"gemini/gemini-2.5-flash",
"gemini/gemini-2.5-pro",
]
task = "Use all the agents available to you to remake the Fibonacci function in Python, providing both an explanation and code."
model_logs = []
for model_name in models_to_test:
print(f"\n--- Testing model: {model_name} ---")
router_execute = MultiAgentRouter(
agents=agents,
temperature=0.5,
model=model_name,
)
try:
result = router_execute.run(task)
print(f"Run completed successfully for {model_name}")
model_logs.append(
{"model": model_name, "status": "✅ Success"}
)
except Exception as e:
print(f"An error occurred for {model_name}")
model_logs.append(
{"model": model_name, "status": f"❌ Error: {e}"}
)
print("\n===== Model Run Summary =====")
for log in model_logs:
print(f"{log['model']}: {log['status']}")

File diff suppressed because it is too large Load Diff

@ -1,600 +0,0 @@
import asyncio
import json
import os
import tempfile
import time
import yaml
from swarm_models import OpenAIChat
from swarms import Agent
def test_basic_agent_functionality():
"""Test basic agent initialization and simple task execution"""
print("\nTesting basic agent functionality...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(agent_name="Test-Agent", llm=model, max_loops=1)
response = agent.run("What is 2+2?")
assert response is not None, "Agent response should not be None"
# Test agent properties
assert (
agent.agent_name == "Test-Agent"
), "Agent name not set correctly"
assert agent.max_loops == 1, "Max loops not set correctly"
assert agent.llm is not None, "LLM not initialized"
print("✓ Basic agent functionality test passed")
def test_memory_management():
"""Test agent memory management functionality"""
print("\nTesting memory management...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Memory-Test-Agent",
llm=model,
max_loops=1,
context_length=8192,
)
# Test adding to memory
agent.add_memory("Test memory entry")
assert (
"Test memory entry"
in agent.short_memory.return_history_as_string()
)
# Test memory query
agent.memory_query("Test query")
# Test token counting
tokens = agent.check_available_tokens()
assert isinstance(tokens, int), "Token count should be an integer"
print("✓ Memory management test passed")
def test_agent_output_formats():
"""Test all available output formats"""
print("\nTesting all output formats...")
model = OpenAIChat(model_name="gpt-4.1")
test_task = "Say hello!"
output_types = {
"str": str,
"string": str,
"list": str, # JSON string containing list
"json": str, # JSON string
"dict": dict,
"yaml": str,
}
for output_type, expected_type in output_types.items():
agent = Agent(
agent_name=f"{output_type.capitalize()}-Output-Agent",
llm=model,
max_loops=1,
output_type=output_type,
)
response = agent.run(test_task)
assert (
response is not None
), f"{output_type} output should not be None"
if output_type == "yaml":
# Verify YAML can be parsed
try:
yaml.safe_load(response)
print(f"{output_type} output valid")
except yaml.YAMLError:
assert False, f"Invalid YAML output for {output_type}"
elif output_type in ["json", "list"]:
# Verify JSON can be parsed
try:
json.loads(response)
print(f"{output_type} output valid")
except json.JSONDecodeError:
assert False, f"Invalid JSON output for {output_type}"
print("✓ Output formats test passed")
def test_agent_state_management():
"""Test comprehensive state management functionality"""
print("\nTesting state management...")
model = OpenAIChat(model_name="gpt-4.1")
# Create temporary directory for test files
with tempfile.TemporaryDirectory() as temp_dir:
state_path = os.path.join(temp_dir, "agent_state.json")
# Create agent with initial state
agent1 = Agent(
agent_name="State-Test-Agent",
llm=model,
max_loops=1,
saved_state_path=state_path,
)
# Add some data to the agent
agent1.run("Remember this: Test message 1")
agent1.add_memory("Test message 2")
# Save state
agent1.save()
assert os.path.exists(state_path), "State file not created"
# Create new agent and load state
agent2 = Agent(
agent_name="State-Test-Agent", llm=model, max_loops=1
)
agent2.load(state_path)
# Verify state loaded correctly
history2 = agent2.short_memory.return_history_as_string()
assert (
"Test message 1" in history2
), "State not loaded correctly"
assert (
"Test message 2" in history2
), "Memory not loaded correctly"
# Test autosave functionality
agent3 = Agent(
agent_name="Autosave-Test-Agent",
llm=model,
max_loops=1,
saved_state_path=os.path.join(
temp_dir, "autosave_state.json"
),
autosave=True,
)
agent3.run("Test autosave")
time.sleep(2) # Wait for autosave
assert os.path.exists(
os.path.join(temp_dir, "autosave_state.json")
), "Autosave file not created"
print("✓ State management test passed")
def test_agent_tools_and_execution():
"""Test agent tool handling and execution"""
print("\nTesting tools and execution...")
def sample_tool(x: int, y: int) -> int:
"""Sample tool that adds two numbers"""
return x + y
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Tools-Test-Agent",
llm=model,
max_loops=1,
tools=[sample_tool],
)
# Test adding tools
agent.add_tool(lambda x: x * 2)
assert len(agent.tools) == 2, "Tool not added correctly"
# Test removing tools
agent.remove_tool(sample_tool)
assert len(agent.tools) == 1, "Tool not removed correctly"
# Test tool execution
response = agent.run("Calculate 2 + 2 using the sample tool")
assert response is not None, "Tool execution failed"
print("✓ Tools and execution test passed")
def test_agent_concurrent_execution():
"""Test agent concurrent execution capabilities"""
print("\nTesting concurrent execution...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Concurrent-Test-Agent", llm=model, max_loops=1
)
# Test bulk run
tasks = [
{"task": "Count to 3"},
{"task": "Say hello"},
{"task": "Tell a short joke"},
]
responses = agent.bulk_run(tasks)
assert len(responses) == len(tasks), "Not all tasks completed"
assert all(
response is not None for response in responses
), "Some tasks failed"
# Test concurrent tasks
concurrent_responses = agent.run_concurrent_tasks(
["Task 1", "Task 2", "Task 3"]
)
assert (
len(concurrent_responses) == 3
), "Not all concurrent tasks completed"
print("✓ Concurrent execution test passed")
def test_agent_error_handling():
"""Test agent error handling and recovery"""
print("\nTesting error handling...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Error-Test-Agent",
llm=model,
max_loops=1,
retry_attempts=3,
retry_interval=1,
)
# Test invalid tool execution
try:
agent.parse_and_execute_tools("invalid_json")
print("✓ Invalid tool execution handled")
except Exception:
assert True, "Expected error caught"
# Test recovery after error
response = agent.run("Continue after error")
assert response is not None, "Agent failed to recover after error"
print("✓ Error handling test passed")
def test_agent_configuration():
"""Test agent configuration and parameters"""
print("\nTesting agent configuration...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Config-Test-Agent",
llm=model,
max_loops=1,
temperature=0.7,
max_tokens=4000,
context_length=8192,
)
# Test configuration methods
agent.update_system_prompt("New system prompt")
agent.update_max_loops(2)
agent.update_loop_interval(2)
# Verify updates
assert agent.max_loops == 2, "Max loops not updated"
assert agent.loop_interval == 2, "Loop interval not updated"
# Test configuration export
config_dict = agent.to_dict()
assert isinstance(
config_dict, dict
), "Configuration export failed"
# Test YAML export
yaml_config = agent.to_yaml()
assert isinstance(yaml_config, str), "YAML export failed"
print("✓ Configuration test passed")
def test_agent_with_stopping_condition():
"""Test agent with custom stopping condition"""
print("\nTesting agent with stopping condition...")
def custom_stopping_condition(response: str) -> bool:
return "STOP" in response.upper()
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Stopping-Condition-Agent",
llm=model,
max_loops=5,
stopping_condition=custom_stopping_condition,
)
response = agent.run("Count up until you see the word STOP")
assert response is not None, "Stopping condition test failed"
print("✓ Stopping condition test passed")
def test_agent_with_retry_mechanism():
"""Test agent retry mechanism"""
print("\nTesting agent retry mechanism...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Retry-Test-Agent",
llm=model,
max_loops=1,
retry_attempts=3,
retry_interval=1,
)
response = agent.run("Tell me a joke.")
assert response is not None, "Retry mechanism test failed"
print("✓ Retry mechanism test passed")
def test_bulk_and_filtered_operations():
"""Test bulk operations and response filtering"""
print("\nTesting bulk and filtered operations...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Bulk-Filter-Test-Agent", llm=model, max_loops=1
)
# Test bulk run
bulk_tasks = [
{"task": "What is 2+2?"},
{"task": "Name a color"},
{"task": "Count to 3"},
]
bulk_responses = agent.bulk_run(bulk_tasks)
assert len(bulk_responses) == len(
bulk_tasks
), "Bulk run should return same number of responses as tasks"
# Test response filtering
agent.add_response_filter("color")
filtered_response = agent.filtered_run(
"What is your favorite color?"
)
assert (
"[FILTERED]" in filtered_response
), "Response filter not applied"
print("✓ Bulk and filtered operations test passed")
async def test_async_operations():
"""Test asynchronous operations"""
print("\nTesting async operations...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Async-Test-Agent", llm=model, max_loops=1
)
# Test single async run
response = await agent.arun("What is 1+1?")
assert response is not None, "Async run failed"
# Test concurrent async runs
tasks = ["Task 1", "Task 2", "Task 3"]
responses = await asyncio.gather(
*[agent.arun(task) for task in tasks]
)
assert len(responses) == len(
tasks
), "Not all async tasks completed"
print("✓ Async operations test passed")
def test_memory_and_state_persistence():
"""Test memory management and state persistence"""
print("\nTesting memory and state persistence...")
with tempfile.TemporaryDirectory() as temp_dir:
state_path = os.path.join(temp_dir, "test_state.json")
# Create agent with memory configuration
model = OpenAIChat(model_name="gpt-4.1")
agent1 = Agent(
agent_name="Memory-State-Test-Agent",
llm=model,
max_loops=1,
saved_state_path=state_path,
context_length=8192,
autosave=True,
)
# Test memory operations
agent1.add_memory("Important fact: The sky is blue")
agent1.memory_query("What color is the sky?")
# Save state
agent1.save()
# Create new agent and load state
agent2 = Agent(
agent_name="Memory-State-Test-Agent",
llm=model,
max_loops=1,
)
agent2.load(state_path)
# Verify memory persistence
memory_content = (
agent2.short_memory.return_history_as_string()
)
assert (
"sky is blue" in memory_content
), "Memory not properly persisted"
print("✓ Memory and state persistence test passed")
def test_sentiment_and_evaluation():
"""Test sentiment analysis and response evaluation"""
print("\nTesting sentiment analysis and evaluation...")
def mock_sentiment_analyzer(text):
"""Mock sentiment analyzer that returns a score between 0 and 1"""
return 0.7 if "positive" in text.lower() else 0.3
def mock_evaluator(response):
"""Mock evaluator that checks response quality"""
return "GOOD" if len(response) > 10 else "BAD"
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Sentiment-Eval-Test-Agent",
llm=model,
max_loops=1,
sentiment_analyzer=mock_sentiment_analyzer,
sentiment_threshold=0.5,
evaluator=mock_evaluator,
)
# Test sentiment analysis
agent.run("Generate a positive message")
# Test evaluation
agent.run("Generate a detailed response")
print("✓ Sentiment and evaluation test passed")
def test_tool_management():
"""Test tool management functionality"""
print("\nTesting tool management...")
def tool1(x: int) -> int:
"""Sample tool 1"""
return x * 2
def tool2(x: int) -> int:
"""Sample tool 2"""
return x + 2
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Tool-Test-Agent",
llm=model,
max_loops=1,
tools=[tool1],
)
# Test adding tools
agent.add_tool(tool2)
assert len(agent.tools) == 2, "Tool not added correctly"
# Test removing tools
agent.remove_tool(tool1)
assert len(agent.tools) == 1, "Tool not removed correctly"
# Test adding multiple tools
agent.add_tools([tool1, tool2])
assert len(agent.tools) == 3, "Multiple tools not added correctly"
print("✓ Tool management test passed")
def test_system_prompt_and_configuration():
"""Test system prompt and configuration updates"""
print("\nTesting system prompt and configuration...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Config-Test-Agent", llm=model, max_loops=1
)
# Test updating system prompt
new_prompt = "You are a helpful assistant."
agent.update_system_prompt(new_prompt)
assert (
agent.system_prompt == new_prompt
), "System prompt not updated"
# Test configuration updates
agent.update_max_loops(5)
assert agent.max_loops == 5, "Max loops not updated"
agent.update_loop_interval(2)
assert agent.loop_interval == 2, "Loop interval not updated"
# Test configuration export
config_dict = agent.to_dict()
assert isinstance(
config_dict, dict
), "Configuration export failed"
print("✓ System prompt and configuration test passed")
def test_agent_with_dynamic_temperature():
"""Test agent with dynamic temperature"""
print("\nTesting agent with dynamic temperature...")
model = OpenAIChat(model_name="gpt-4.1")
agent = Agent(
agent_name="Dynamic-Temp-Agent",
llm=model,
max_loops=2,
dynamic_temperature_enabled=True,
)
response = agent.run("Generate a creative story.")
assert response is not None, "Dynamic temperature test failed"
print("✓ Dynamic temperature test passed")
def run_all_tests():
"""Run all test functions"""
print("Starting Extended Agent functional tests...\n")
test_functions = [
test_basic_agent_functionality,
test_memory_management,
test_agent_output_formats,
test_agent_state_management,
test_agent_tools_and_execution,
test_agent_concurrent_execution,
test_agent_error_handling,
test_agent_configuration,
test_agent_with_stopping_condition,
test_agent_with_retry_mechanism,
test_agent_with_dynamic_temperature,
test_bulk_and_filtered_operations,
test_memory_and_state_persistence,
test_sentiment_and_evaluation,
test_tool_management,
test_system_prompt_and_configuration,
]
# Run synchronous tests
total_tests = len(test_functions) + 1 # +1 for async test
passed_tests = 0
for test in test_functions:
try:
test()
passed_tests += 1
except Exception as e:
print(f"✗ Test {test.__name__} failed: {str(e)}")
# Run async test
try:
asyncio.run(test_async_operations())
passed_tests += 1
except Exception as e:
print(f"✗ Async operations test failed: {str(e)}")
print("\nExtended Test Summary:")
print(f"Total Tests: {total_tests}")
print(f"Passed: {passed_tests}")
print(f"Failed: {total_tests - passed_tests}")
print(f"Success Rate: {(passed_tests/total_tests)*100:.2f}%")
if __name__ == "__main__":
run_all_tests()

@ -0,0 +1,381 @@
import pytest
from unittest.mock import Mock, patch
from swarms.structs.agent_router import AgentRouter
from swarms.structs.agent import Agent
@pytest.fixture
def test_agent():
"""Create a real agent for testing."""
with patch("swarms.structs.agent.LiteLLM") as mock_llm:
mock_llm.return_value.run.return_value = "Test response"
return Agent(
agent_name="test_agent",
agent_description="A test agent",
system_prompt="You are a test agent",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
print_on=False,
)
def test_agent_router_initialization_default():
"""Test AgentRouter initialization with default parameters."""
with patch("swarms.structs.agent_router.embedding"):
router = AgentRouter()
assert router.embedding_model == "text-embedding-ada-002"
assert router.n_agents == 1
assert router.api_key is None
assert router.api_base is None
assert router.agents == []
assert router.agent_embeddings == []
assert router.agent_metadata == []
def test_agent_router_initialization_custom():
"""Test AgentRouter initialization with custom parameters."""
with patch("swarms.structs.agent_router.embedding"), patch(
"swarms.structs.agent.LiteLLM"
) as mock_llm:
mock_llm.return_value.run.return_value = "Test response"
agents = [
Agent(
agent_name="test1",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
print_on=False,
),
Agent(
agent_name="test2",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
print_on=False,
),
]
router = AgentRouter(
embedding_model="custom-model",
n_agents=3,
api_key="custom_key",
api_base="custom_base",
agents=agents,
)
assert router.embedding_model == "custom-model"
assert router.n_agents == 3
assert router.api_key == "custom_key"
assert router.api_base == "custom_base"
assert len(router.agents) == 2
def test_cosine_similarity_identical_vectors():
"""Test cosine similarity with identical vectors."""
router = AgentRouter()
vec1 = [1.0, 0.0, 0.0]
vec2 = [1.0, 0.0, 0.0]
result = router._cosine_similarity(vec1, vec2)
assert result == 1.0
def test_cosine_similarity_orthogonal_vectors():
"""Test cosine similarity with orthogonal vectors."""
router = AgentRouter()
vec1 = [1.0, 0.0, 0.0]
vec2 = [0.0, 1.0, 0.0]
result = router._cosine_similarity(vec1, vec2)
assert result == 0.0
def test_cosine_similarity_opposite_vectors():
"""Test cosine similarity with opposite vectors."""
router = AgentRouter()
vec1 = [1.0, 0.0, 0.0]
vec2 = [-1.0, 0.0, 0.0]
result = router._cosine_similarity(vec1, vec2)
assert result == -1.0
def test_cosine_similarity_different_lengths():
"""Test cosine similarity with vectors of different lengths."""
router = AgentRouter()
vec1 = [1.0, 0.0]
vec2 = [1.0, 0.0, 0.0]
with pytest.raises(
ValueError, match="Vectors must have the same length"
):
router._cosine_similarity(vec1, vec2)
@patch("swarms.structs.agent_router.embedding")
def test_generate_embedding_success(mock_embedding):
"""Test successful embedding generation."""
mock_embedding.return_value.data = [
Mock(embedding=[0.1, 0.2, 0.3, 0.4])
]
router = AgentRouter()
result = router._generate_embedding("test text")
assert result == [0.1, 0.2, 0.3, 0.4]
mock_embedding.assert_called_once()
@patch("swarms.structs.agent_router.embedding")
def test_generate_embedding_error(mock_embedding):
"""Test embedding generation error handling."""
mock_embedding.side_effect = Exception("API Error")
router = AgentRouter()
with pytest.raises(Exception, match="API Error"):
router._generate_embedding("test text")
@patch("swarms.structs.agent_router.embedding")
def test_add_agent_success(mock_embedding, test_agent):
"""Test successful agent addition."""
mock_embedding.return_value.data = [
Mock(embedding=[0.1, 0.2, 0.3])
]
router = AgentRouter()
router.add_agent(test_agent)
assert len(router.agents) == 1
assert len(router.agent_embeddings) == 1
assert len(router.agent_metadata) == 1
assert router.agents[0] == test_agent
assert router.agent_embeddings[0] == [0.1, 0.2, 0.3]
assert router.agent_metadata[0]["name"] == "test_agent"
@patch("swarms.structs.agent_router.embedding")
def test_add_agent_retry_error(mock_embedding, test_agent):
"""Test agent addition with retry mechanism failure."""
mock_embedding.side_effect = Exception("Embedding error")
router = AgentRouter()
# Should raise RetryError after retries are exhausted
with pytest.raises(Exception) as exc_info:
router.add_agent(test_agent)
# Check that it's a retry error or contains the original error
assert "Embedding error" in str(
exc_info.value
) or "RetryError" in str(exc_info.value)
@patch("swarms.structs.agent_router.embedding")
def test_add_agents_multiple(mock_embedding):
"""Test adding multiple agents."""
mock_embedding.return_value.data = [
Mock(embedding=[0.1, 0.2, 0.3])
]
with patch("swarms.structs.agent.LiteLLM") as mock_llm:
mock_llm.return_value.run.return_value = "Test response"
router = AgentRouter()
agents = [
Agent(
agent_name="agent1",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
print_on=False,
),
Agent(
agent_name="agent2",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
print_on=False,
),
Agent(
agent_name="agent3",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
print_on=False,
),
]
router.add_agents(agents)
assert len(router.agents) == 3
assert len(router.agent_embeddings) == 3
assert len(router.agent_metadata) == 3
@patch("swarms.structs.agent_router.embedding")
def test_find_best_agent_success(mock_embedding):
"""Test successful best agent finding."""
# Mock embeddings for agents and task
mock_embedding.side_effect = [
Mock(data=[Mock(embedding=[0.1, 0.2, 0.3])]), # agent1
Mock(data=[Mock(embedding=[0.4, 0.5, 0.6])]), # agent2
Mock(data=[Mock(embedding=[0.7, 0.8, 0.9])]), # task
]
with patch("swarms.structs.agent.LiteLLM") as mock_llm:
mock_llm.return_value.run.return_value = "Test response"
router = AgentRouter()
agent1 = Agent(
agent_name="agent1",
agent_description="First agent",
system_prompt="Prompt 1",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
print_on=False,
)
agent2 = Agent(
agent_name="agent2",
agent_description="Second agent",
system_prompt="Prompt 2",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
print_on=False,
)
router.add_agent(agent1)
router.add_agent(agent2)
# Mock the similarity calculation to return predictable results
with patch.object(
router, "_cosine_similarity"
) as mock_similarity:
mock_similarity.side_effect = [
0.8,
0.6,
] # agent1 more similar
result = router.find_best_agent("test task")
assert result == agent1
def test_find_best_agent_no_agents():
"""Test finding best agent when no agents are available."""
with patch("swarms.structs.agent_router.embedding"):
router = AgentRouter()
result = router.find_best_agent("test task")
assert result is None
@patch("swarms.structs.agent_router.embedding")
def test_find_best_agent_retry_error(mock_embedding):
"""Test error handling in find_best_agent with retry mechanism."""
mock_embedding.side_effect = Exception("API Error")
with patch("swarms.structs.agent.LiteLLM") as mock_llm:
mock_llm.return_value.run.return_value = "Test response"
router = AgentRouter()
router.agents = [
Agent(
agent_name="agent1",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
print_on=False,
)
]
router.agent_embeddings = [[0.1, 0.2, 0.3]]
# Should raise RetryError after retries are exhausted
with pytest.raises(Exception) as exc_info:
router.find_best_agent("test task")
# Check that it's a retry error or contains the original error
assert "API Error" in str(
exc_info.value
) or "RetryError" in str(exc_info.value)
@patch("swarms.structs.agent_router.embedding")
def test_update_agent_history_success(mock_embedding, test_agent):
"""Test successful agent history update."""
mock_embedding.return_value.data = [
Mock(embedding=[0.1, 0.2, 0.3])
]
router = AgentRouter()
router.add_agent(test_agent)
# Update agent history
router.update_agent_history("test_agent")
# Verify the embedding was regenerated
assert (
mock_embedding.call_count == 2
) # Once for add, once for update
def test_update_agent_history_agent_not_found():
"""Test updating history for non-existent agent."""
with patch(
"swarms.structs.agent_router.embedding"
) as mock_embedding:
mock_embedding.return_value.data = [
Mock(embedding=[0.1, 0.2, 0.3])
]
router = AgentRouter()
# Should not raise an exception, just log a warning
router.update_agent_history("non_existent_agent")
@patch("swarms.structs.agent_router.embedding")
def test_agent_metadata_structure(mock_embedding, test_agent):
"""Test the structure of agent metadata."""
mock_embedding.return_value.data = [
Mock(embedding=[0.1, 0.2, 0.3])
]
router = AgentRouter()
router.add_agent(test_agent)
metadata = router.agent_metadata[0]
assert "name" in metadata
assert "text" in metadata
assert metadata["name"] == "test_agent"
assert (
"test_agent A test agent You are a test agent"
in metadata["text"]
)
def test_agent_router_edge_cases():
"""Test various edge cases."""
with patch(
"swarms.structs.agent_router.embedding"
) as mock_embedding:
mock_embedding.return_value.data = [
Mock(embedding=[0.1, 0.2, 0.3])
]
router = AgentRouter()
# Test with empty string task
result = router.find_best_agent("")
assert result is None
# Test with very long task description
long_task = "test " * 1000
result = router.find_best_agent(long_task)
assert result is None
if __name__ == "__main__":
pytest.main([__file__])

@ -1,328 +0,0 @@
import os
import traceback
from datetime import datetime
from typing import Callable, Dict, List, Optional
from loguru import logger
from swarm_models import OpenAIChat
from swarms.structs.agent import Agent
from swarms.structs.agent_rearrange import AgentRearrange
class TestResult:
"""Class to store test results and metadata"""
def __init__(self, test_name: str):
self.test_name = test_name
self.start_time = datetime.now()
self.end_time = None
self.success = False
self.error = None
self.traceback = None
self.function_output = None
def complete(
self, success: bool, error: Optional[Exception] = None
):
"""Complete the test execution with results"""
self.end_time = datetime.now()
self.success = success
if error:
self.error = str(error)
self.traceback = traceback.format_exc()
def duration(self) -> float:
"""Calculate test duration in seconds"""
if self.end_time:
return (self.end_time - self.start_time).total_seconds()
return 0
def run_test(test_func: Callable) -> TestResult:
"""
Decorator to run tests with error handling and logging
Args:
test_func (Callable): Test function to execute
Returns:
TestResult: Object containing test execution details
"""
def wrapper(*args, **kwargs) -> TestResult:
result = TestResult(test_func.__name__)
logger.info(
f"\n{'='*20} Running test: {test_func.__name__} {'='*20}"
)
try:
output = test_func(*args, **kwargs)
result.function_output = output
result.complete(success=True)
logger.success(
f"✅ Test {test_func.__name__} passed successfully"
)
except Exception as e:
result.complete(success=False, error=e)
logger.error(
f"❌ Test {test_func.__name__} failed with error: {str(e)}"
)
logger.error(f"Traceback: {traceback.format_exc()}")
logger.info(
f"Test duration: {result.duration():.2f} seconds\n"
)
return result
return wrapper
def create_functional_agents() -> List[Agent]:
"""
Create a list of functional agents with real LLM integration for testing.
Using OpenAI's GPT model for realistic agent behavior testing.
"""
# Initialize OpenAI Chat model
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
logger.warning(
"No OpenAI API key found. Using mock agents instead."
)
return [
create_mock_agent("TestAgent1"),
create_mock_agent("TestAgent2"),
]
try:
model = OpenAIChat(
api_key=api_key, model_name="gpt-4.1", temperature=0.1
)
# Create boss agent
boss_agent = Agent(
agent_name="BossAgent",
system_prompt="""
You are the BossAgent responsible for managing and overseeing test scenarios.
Your role is to coordinate tasks between agents and ensure efficient collaboration.
Analyze inputs, break down tasks, and provide clear directives to other agents.
Maintain a structured approach to task management and result compilation.
""",
llm=model,
max_loops=1,
dashboard=False,
streaming_on=True,
verbose=True,
stopping_token="<DONE>",
state_save_file_type="json",
saved_state_path="test_boss_agent.json",
)
# Create analysis agent
analysis_agent = Agent(
agent_name="AnalysisAgent",
system_prompt="""
You are the AnalysisAgent responsible for detailed data processing and analysis.
Your role is to examine input data, identify patterns, and provide analytical insights.
Focus on breaking down complex information into clear, actionable components.
""",
llm=model,
max_loops=1,
dashboard=False,
streaming_on=True,
verbose=True,
stopping_token="<DONE>",
state_save_file_type="json",
saved_state_path="test_analysis_agent.json",
)
# Create summary agent
summary_agent = Agent(
agent_name="SummaryAgent",
system_prompt="""
You are the SummaryAgent responsible for consolidating and summarizing information.
Your role is to take detailed analysis and create concise, actionable summaries.
Focus on highlighting key points and ensuring clarity in communication.
""",
llm=model,
max_loops=1,
dashboard=False,
streaming_on=True,
verbose=True,
stopping_token="<DONE>",
state_save_file_type="json",
saved_state_path="test_summary_agent.json",
)
logger.info(
"Successfully created functional agents with LLM integration"
)
return [boss_agent, analysis_agent, summary_agent]
except Exception as e:
logger.error(f"Failed to create functional agents: {str(e)}")
logger.warning("Falling back to mock agents")
return [
create_mock_agent("TestAgent1"),
create_mock_agent("TestAgent2"),
]
def create_mock_agent(name: str) -> Agent:
"""Create a mock agent for testing when LLM integration is not available"""
return Agent(
agent_name=name,
system_prompt=f"You are a test agent named {name}",
llm=None,
)
@run_test
def test_init():
"""Test AgentRearrange initialization with functional agents"""
logger.info("Creating agents for initialization test")
agents = create_functional_agents()
rearrange = AgentRearrange(
name="TestRearrange",
agents=agents,
flow=f"{agents[0].agent_name} -> {agents[1].agent_name} -> {agents[2].agent_name}",
)
assert rearrange.name == "TestRearrange"
assert len(rearrange.agents) == 3
assert (
rearrange.flow
== f"{agents[0].agent_name} -> {agents[1].agent_name} -> {agents[2].agent_name}"
)
logger.info(
f"Initialized AgentRearrange with {len(agents)} agents"
)
return True
@run_test
def test_validate_flow():
"""Test flow validation logic"""
agents = create_functional_agents()
rearrange = AgentRearrange(
agents=agents,
flow=f"{agents[0].agent_name} -> {agents[1].agent_name}",
)
logger.info("Testing valid flow pattern")
valid = rearrange.validate_flow()
assert valid is True
logger.info("Testing invalid flow pattern")
rearrange.flow = f"{agents[0].agent_name} {agents[1].agent_name}" # Missing arrow
try:
rearrange.validate_flow()
assert False, "Should have raised ValueError"
except ValueError as e:
logger.info(
f"Successfully caught invalid flow error: {str(e)}"
)
assert True
return True
@run_test
def test_add_remove_agent():
"""Test adding and removing agents from the swarm"""
agents = create_functional_agents()
rearrange = AgentRearrange(
agents=agents[:2]
) # Start with first two agents
logger.info("Testing agent addition")
new_agent = agents[2] # Use the third agent as new agent
rearrange.add_agent(new_agent)
assert new_agent.agent_name in rearrange.agents
logger.info("Testing agent removal")
rearrange.remove_agent(new_agent.agent_name)
assert new_agent.agent_name not in rearrange.agents
return True
@run_test
def test_basic_run():
"""Test basic task execution with the swarm"""
agents = create_functional_agents()
rearrange = AgentRearrange(
name="TestSwarm",
agents=agents,
flow=f"{agents[0].agent_name} -> {agents[1].agent_name} -> {agents[2].agent_name}",
max_loops=1,
)
test_task = (
"Analyze this test message and provide a brief summary."
)
logger.info(f"Running test task: {test_task}")
try:
result = rearrange.run(test_task)
assert result is not None
logger.info(
f"Successfully executed task with result length: {len(str(result))}"
)
return True
except Exception as e:
logger.error(f"Task execution failed: {str(e)}")
raise
def run_all_tests() -> Dict[str, TestResult]:
"""
Run all test cases and collect results
Returns:
Dict[str, TestResult]: Dictionary mapping test names to their results
"""
logger.info("\n🚀 Starting AgentRearrange test suite execution")
test_functions = [
test_init,
test_validate_flow,
test_add_remove_agent,
test_basic_run,
]
results = {}
for test in test_functions:
result = test()
results[test.__name__] = result
# Log summary
total_tests = len(results)
passed_tests = sum(1 for r in results.values() if r.success)
failed_tests = total_tests - passed_tests
logger.info("\n📊 Test Suite Summary:")
logger.info(f"Total Tests: {total_tests}")
print(f"✅ Passed: {passed_tests}")
if failed_tests > 0:
logger.error(f"❌ Failed: {failed_tests}")
# Detailed failure information
if failed_tests > 0:
logger.error("\n❌ Failed Tests Details:")
for name, result in results.items():
if not result.success:
logger.error(f"\n{name}:")
logger.error(f"Error: {result.error}")
logger.error(f"Traceback: {result.traceback}")
return results
if __name__ == "__main__":
print("🌟 Starting AgentRearrange Test Suite")
results = run_all_tests()
print("🏁 Test Suite Execution Completed")

@ -1,313 +0,0 @@
import time
from loguru import logger
from swarms import Agent
from experimental.airflow_swarm import (
AirflowDAGSwarm,
NodeType,
Conversation,
)
# Configure logger
logger.remove()
logger.add(lambda msg: print(msg, end=""), level="DEBUG")
def test_swarm_initialization():
"""Test basic swarm initialization and configuration."""
try:
swarm = AirflowDAGSwarm(
dag_id="test_dag",
name="Test DAG",
initial_message="Test message",
)
assert swarm.dag_id == "test_dag", "DAG ID not set correctly"
assert swarm.name == "Test DAG", "Name not set correctly"
assert (
len(swarm.nodes) == 0
), "Nodes should be empty on initialization"
assert (
len(swarm.edges) == 0
), "Edges should be empty on initialization"
# Test initial message
conv_json = swarm.get_conversation_history()
assert (
"Test message" in conv_json
), "Initial message not set correctly"
print("✅ Swarm initialization test passed")
return True
except AssertionError as e:
print(f"❌ Swarm initialization test failed: {str(e)}")
return False
def test_node_addition():
"""Test adding different types of nodes to the swarm."""
try:
swarm = AirflowDAGSwarm(dag_id="test_dag")
# Test adding an agent node
agent = Agent(
agent_name="Test-Agent",
system_prompt="Test prompt",
model_name="gpt-4o-mini",
max_loops=1,
)
agent_id = swarm.add_node(
"test_agent",
agent,
NodeType.AGENT,
query="Test query",
concurrent=True,
)
assert (
agent_id == "test_agent"
), "Agent node ID not returned correctly"
assert (
"test_agent" in swarm.nodes
), "Agent node not added to nodes dict"
# Test adding a callable node
def test_callable(x: int, conversation: Conversation) -> str:
return f"Test output {x}"
callable_id = swarm.add_node(
"test_callable",
test_callable,
NodeType.CALLABLE,
args=[42],
concurrent=False,
)
assert (
callable_id == "test_callable"
), "Callable node ID not returned correctly"
assert (
"test_callable" in swarm.nodes
), "Callable node not added to nodes dict"
print("✅ Node addition test passed")
return True
except AssertionError as e:
print(f"❌ Node addition test failed: {str(e)}")
return False
except Exception as e:
print(
f"❌ Node addition test failed with unexpected error: {str(e)}"
)
return False
def test_edge_addition():
"""Test adding edges between nodes."""
try:
swarm = AirflowDAGSwarm(dag_id="test_dag")
# Add two nodes
def node1_fn(conversation: Conversation) -> str:
return "Node 1 output"
def node2_fn(conversation: Conversation) -> str:
return "Node 2 output"
swarm.add_node("node1", node1_fn, NodeType.CALLABLE)
swarm.add_node("node2", node2_fn, NodeType.CALLABLE)
# Add edge between them
swarm.add_edge("node1", "node2")
assert (
"node2" in swarm.edges["node1"]
), "Edge not added correctly"
assert (
len(swarm.edges["node1"]) == 1
), "Incorrect number of edges"
# Test adding edge with non-existent node
try:
swarm.add_edge("node1", "non_existent")
assert (
False
), "Should raise ValueError for non-existent node"
except ValueError:
pass
print("✅ Edge addition test passed")
return True
except AssertionError as e:
print(f"❌ Edge addition test failed: {str(e)}")
return False
def test_execution_order():
"""Test that nodes are executed in the correct order based on dependencies."""
try:
swarm = AirflowDAGSwarm(dag_id="test_dag")
execution_order = []
def node1(conversation: Conversation) -> str:
execution_order.append("node1")
return "Node 1 output"
def node2(conversation: Conversation) -> str:
execution_order.append("node2")
return "Node 2 output"
def node3(conversation: Conversation) -> str:
execution_order.append("node3")
return "Node 3 output"
# Add nodes
swarm.add_node(
"node1", node1, NodeType.CALLABLE, concurrent=False
)
swarm.add_node(
"node2", node2, NodeType.CALLABLE, concurrent=False
)
swarm.add_node(
"node3", node3, NodeType.CALLABLE, concurrent=False
)
# Add edges to create a chain: node1 -> node2 -> node3
swarm.add_edge("node1", "node2")
swarm.add_edge("node2", "node3")
# Execute
swarm.run()
# Check execution order
assert execution_order == [
"node1",
"node2",
"node3",
], "Incorrect execution order"
print("✅ Execution order test passed")
return True
except AssertionError as e:
print(f"❌ Execution order test failed: {str(e)}")
return False
def test_concurrent_execution():
"""Test concurrent execution of nodes."""
try:
swarm = AirflowDAGSwarm(dag_id="test_dag")
def slow_node1(conversation: Conversation) -> str:
time.sleep(0.5)
return "Slow node 1 output"
def slow_node2(conversation: Conversation) -> str:
time.sleep(0.5)
return "Slow node 2 output"
# Add nodes with concurrent=True
swarm.add_node(
"slow1", slow_node1, NodeType.CALLABLE, concurrent=True
)
swarm.add_node(
"slow2", slow_node2, NodeType.CALLABLE, concurrent=True
)
# Measure execution time
start_time = time.time()
swarm.run()
execution_time = time.time() - start_time
# Should take ~0.5s for concurrent execution, not ~1s
assert (
execution_time < 0.8
), "Concurrent execution took too long"
print("✅ Concurrent execution test passed")
return True
except AssertionError as e:
print(f"❌ Concurrent execution test failed: {str(e)}")
return False
def test_conversation_handling():
"""Test conversation management within the swarm."""
try:
swarm = AirflowDAGSwarm(
dag_id="test_dag", initial_message="Initial test message"
)
# Test adding user messages
swarm.add_user_message("Test message 1")
swarm.add_user_message("Test message 2")
history = swarm.get_conversation_history()
assert (
"Initial test message" in history
), "Initial message not in history"
assert (
"Test message 1" in history
), "First message not in history"
assert (
"Test message 2" in history
), "Second message not in history"
print("✅ Conversation handling test passed")
return True
except AssertionError as e:
print(f"❌ Conversation handling test failed: {str(e)}")
return False
def test_error_handling():
"""Test error handling in node execution."""
try:
swarm = AirflowDAGSwarm(dag_id="test_dag")
def failing_node(conversation: Conversation) -> str:
raise ValueError("Test error")
swarm.add_node("failing", failing_node, NodeType.CALLABLE)
# Execute should not raise an exception
result = swarm.run()
assert (
"Error" in result
), "Error not captured in execution result"
assert (
"Test error" in result
), "Specific error message not captured"
print("✅ Error handling test passed")
return True
except Exception as e:
print(f"❌ Error handling test failed: {str(e)}")
return False
def run_all_tests():
"""Run all test functions and report results."""
tests = [
test_swarm_initialization,
test_node_addition,
test_edge_addition,
test_execution_order,
test_concurrent_execution,
test_conversation_handling,
test_error_handling,
]
results = []
for test in tests:
print(f"\nRunning {test.__name__}...")
result = test()
results.append(result)
total = len(results)
passed = sum(results)
print("\n=== Test Results ===")
print(f"Total tests: {total}")
print(f"Passed: {passed}")
print(f"Failed: {total - passed}")
print("==================")
if __name__ == "__main__":
run_all_tests()

File diff suppressed because it is too large Load Diff

@ -1,293 +0,0 @@
"""
Tests for bug #1115 fix in AutoSwarmBuilder.
This test module verifies the fix for AttributeError when creating agents
from AgentSpec Pydantic models in AutoSwarmBuilder.
Bug: https://github.com/kyegomez/swarms/issues/1115
"""
import pytest
from swarms.structs.agent import Agent
from swarms.structs.auto_swarm_builder import (
AgentSpec,
AutoSwarmBuilder,
)
from swarms.structs.ma_utils import set_random_models_for_agents
class TestAutoSwarmBuilderFix:
"""Tests for bug #1115 fix in AutoSwarmBuilder."""
def test_create_agents_from_specs_with_dict(self):
"""Test that create_agents_from_specs handles dict input correctly."""
builder = AutoSwarmBuilder()
# Create specs as a dictionary
specs = {
"agents": [
{
"agent_name": "test_agent_1",
"description": "Test agent 1 description",
"system_prompt": "You are a helpful assistant",
"model_name": "gpt-4o-mini",
"max_loops": 1,
}
]
}
agents = builder.create_agents_from_specs(specs)
# Verify agents were created correctly
assert len(agents) == 1
assert isinstance(agents[0], Agent)
assert agents[0].agent_name == "test_agent_1"
# Verify description was mapped to agent_description
assert hasattr(agents[0], "agent_description")
assert (
agents[0].agent_description == "Test agent 1 description"
)
def test_create_agents_from_specs_with_pydantic(self):
"""Test that create_agents_from_specs handles Pydantic model input correctly.
This is the main test for bug #1115 - it verifies that AgentSpec
Pydantic models can be unpacked correctly.
"""
builder = AutoSwarmBuilder()
# Create specs as Pydantic AgentSpec objects
agent_spec = AgentSpec(
agent_name="test_agent_pydantic",
description="Pydantic test agent",
system_prompt="You are a helpful assistant",
model_name="gpt-4o-mini",
max_loops=1,
)
specs = {"agents": [agent_spec]}
agents = builder.create_agents_from_specs(specs)
# Verify agents were created correctly
assert len(agents) == 1
assert isinstance(agents[0], Agent)
assert agents[0].agent_name == "test_agent_pydantic"
# Verify description was mapped to agent_description
assert hasattr(agents[0], "agent_description")
assert agents[0].agent_description == "Pydantic test agent"
def test_parameter_name_mapping(self):
"""Test that 'description' field maps to 'agent_description' correctly."""
builder = AutoSwarmBuilder()
# Test with dict that has 'description'
specs = {
"agents": [
{
"agent_name": "mapping_test",
"description": "This should map to agent_description",
"system_prompt": "You are helpful",
}
]
}
agents = builder.create_agents_from_specs(specs)
assert len(agents) == 1
agent = agents[0]
# Verify description was mapped
assert hasattr(agent, "agent_description")
assert (
agent.agent_description
== "This should map to agent_description"
)
def test_create_agents_from_specs_mixed_input(self):
"""Test that create_agents_from_specs handles mixed dict and Pydantic input."""
builder = AutoSwarmBuilder()
# Mix of dict and Pydantic objects
dict_spec = {
"agent_name": "dict_agent",
"description": "Dict agent description",
"system_prompt": "You are helpful",
}
pydantic_spec = AgentSpec(
agent_name="pydantic_agent",
description="Pydantic agent description",
system_prompt="You are smart",
)
specs = {"agents": [dict_spec, pydantic_spec]}
agents = builder.create_agents_from_specs(specs)
# Verify both agents were created
assert len(agents) == 2
assert all(isinstance(agent, Agent) for agent in agents)
# Verify both have correct descriptions
dict_agent = next(
a for a in agents if a.agent_name == "dict_agent"
)
pydantic_agent = next(
a for a in agents if a.agent_name == "pydantic_agent"
)
assert (
dict_agent.agent_description == "Dict agent description"
)
assert (
pydantic_agent.agent_description
== "Pydantic agent description"
)
def test_set_random_models_for_agents_with_valid_agents(
self,
):
"""Test set_random_models_for_agents with proper Agent objects."""
# Create proper Agent objects
agents = [
Agent(
agent_name="agent1",
system_prompt="You are agent 1",
max_loops=1,
),
Agent(
agent_name="agent2",
system_prompt="You are agent 2",
max_loops=1,
),
]
# Set random models
model_names = ["gpt-4o-mini", "gpt-4o", "claude-3-5-sonnet"]
result = set_random_models_for_agents(
agents=agents, model_names=model_names
)
# Verify results
assert len(result) == 2
assert all(isinstance(agent, Agent) for agent in result)
assert all(hasattr(agent, "model_name") for agent in result)
assert all(
agent.model_name in model_names for agent in result
)
def test_set_random_models_for_agents_with_single_agent(
self,
):
"""Test set_random_models_for_agents with a single agent."""
agent = Agent(
agent_name="single_agent",
system_prompt="You are helpful",
max_loops=1,
)
model_names = ["gpt-4o-mini", "gpt-4o"]
result = set_random_models_for_agents(
agents=agent, model_names=model_names
)
assert isinstance(result, Agent)
assert hasattr(result, "model_name")
assert result.model_name in model_names
def test_set_random_models_for_agents_with_none(self):
"""Test set_random_models_for_agents with None returns random model name."""
model_names = ["gpt-4o-mini", "gpt-4o", "claude-3-5-sonnet"]
result = set_random_models_for_agents(
agents=None, model_names=model_names
)
assert isinstance(result, str)
assert result in model_names
@pytest.mark.skip(
reason="This test requires API key and makes LLM calls"
)
def test_auto_swarm_builder_return_agents_objects_integration(
self,
):
"""Integration test for AutoSwarmBuilder with execution_type='return-agents-objects'.
This test requires OPENAI_API_KEY and makes actual LLM calls.
Run manually with: pytest -k test_auto_swarm_builder_return_agents_objects_integration -v
"""
builder = AutoSwarmBuilder(
execution_type="return-agents-objects",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
)
agents = builder.run(
"Create a team of 2 data analysis agents with specific roles"
)
# Verify agents were created
assert isinstance(agents, list)
assert len(agents) >= 1
assert all(isinstance(agent, Agent) for agent in agents)
assert all(hasattr(agent, "agent_name") for agent in agents)
assert all(
hasattr(agent, "agent_description") for agent in agents
)
def test_agent_spec_to_agent_all_fields(self):
"""Test that all AgentSpec fields are properly passed to Agent."""
builder = AutoSwarmBuilder()
agent_spec = AgentSpec(
agent_name="full_test_agent",
description="Full test description",
system_prompt="You are a comprehensive test agent",
model_name="gpt-4o-mini",
auto_generate_prompt=False,
max_tokens=4096,
temperature=0.7,
role="worker",
max_loops=3,
goal="Test all parameters",
)
agents = builder.create_agents_from_specs(
{"agents": [agent_spec]}
)
assert len(agents) == 1
agent = agents[0]
# Verify all fields were set
assert agent.agent_name == "full_test_agent"
assert agent.agent_description == "Full test description"
# Agent may modify system_prompt by adding additional instructions
assert (
"You are a comprehensive test agent"
in agent.system_prompt
)
assert agent.max_loops == 3
assert agent.max_tokens == 4096
assert agent.temperature == 0.7
def test_create_agents_from_specs_empty_list(self):
"""Test that create_agents_from_specs handles empty agent list."""
builder = AutoSwarmBuilder()
specs = {"agents": []}
agents = builder.create_agents_from_specs(specs)
assert isinstance(agents, list)
assert len(agents) == 0
if __name__ == "__main__":
# Run tests with pytest
pytest.main([__file__, "-v", "--tb=short"])

@ -1,10 +1,18 @@
from swarms.structs.auto_swarm_builder import AutoSwarmBuilder
import pytest
from dotenv import load_dotenv
from swarms.structs.agent import Agent
from swarms.structs.auto_swarm_builder import (
AgentSpec,
AutoSwarmBuilder,
)
from swarms.structs.ma_utils import set_random_models_for_agents
load_dotenv()
def print_separator():
"""Print a separator line for test output formatting."""
print("\n" + "=" * 50)
@ -194,5 +202,273 @@ def run_all_tests():
raise
# Bug Fix Tests (from test_auto_swarm_builder_fix.py)
class TestAutoSwarmBuilderFix:
"""Tests for bug #1115 fix in AutoSwarmBuilder."""
def test_create_agents_from_specs_with_dict(self):
"""Test that create_agents_from_specs handles dict input correctly."""
builder = AutoSwarmBuilder()
# Create specs as a dictionary
specs = {
"agents": [
{
"agent_name": "test_agent_1",
"description": "Test agent 1 description",
"system_prompt": "You are a helpful assistant",
"model_name": "gpt-4o-mini",
"max_loops": 1,
}
]
}
agents = builder.create_agents_from_specs(specs)
# Verify agents were created correctly
assert len(agents) == 1
assert isinstance(agents[0], Agent)
assert agents[0].agent_name == "test_agent_1"
# Verify description was mapped to agent_description
assert hasattr(agents[0], "agent_description")
assert (
agents[0].agent_description == "Test agent 1 description"
)
def test_create_agents_from_specs_with_pydantic(self):
"""Test that create_agents_from_specs handles Pydantic model input correctly.
This is the main test for bug #1115 - it verifies that AgentSpec
Pydantic models can be unpacked correctly.
"""
builder = AutoSwarmBuilder()
# Create specs as Pydantic AgentSpec objects
agent_spec = AgentSpec(
agent_name="test_agent_pydantic",
description="Pydantic test agent",
system_prompt="You are a helpful assistant",
model_name="gpt-4o-mini",
max_loops=1,
)
specs = {"agents": [agent_spec]}
agents = builder.create_agents_from_specs(specs)
# Verify agents were created correctly
assert len(agents) == 1
assert isinstance(agents[0], Agent)
assert agents[0].agent_name == "test_agent_pydantic"
# Verify description was mapped to agent_description
assert hasattr(agents[0], "agent_description")
assert agents[0].agent_description == "Pydantic test agent"
def test_parameter_name_mapping(self):
"""Test that 'description' field maps to 'agent_description' correctly."""
builder = AutoSwarmBuilder()
# Test with dict that has 'description'
specs = {
"agents": [
{
"agent_name": "mapping_test",
"description": "This should map to agent_description",
"system_prompt": "You are helpful",
}
]
}
agents = builder.create_agents_from_specs(specs)
assert len(agents) == 1
agent = agents[0]
# Verify description was mapped
assert hasattr(agent, "agent_description")
assert (
agent.agent_description
== "This should map to agent_description"
)
def test_create_agents_from_specs_mixed_input(self):
"""Test that create_agents_from_specs handles mixed dict and Pydantic input."""
builder = AutoSwarmBuilder()
# Mix of dict and Pydantic objects
dict_spec = {
"agent_name": "dict_agent",
"description": "Dict agent description",
"system_prompt": "You are helpful",
}
pydantic_spec = AgentSpec(
agent_name="pydantic_agent",
description="Pydantic agent description",
system_prompt="You are smart",
)
specs = {"agents": [dict_spec, pydantic_spec]}
agents = builder.create_agents_from_specs(specs)
# Verify both agents were created
assert len(agents) == 2
assert all(isinstance(agent, Agent) for agent in agents)
# Verify both have correct descriptions
dict_agent = next(
a for a in agents if a.agent_name == "dict_agent"
)
pydantic_agent = next(
a for a in agents if a.agent_name == "pydantic_agent"
)
assert (
dict_agent.agent_description == "Dict agent description"
)
assert (
pydantic_agent.agent_description
== "Pydantic agent description"
)
def test_set_random_models_for_agents_with_valid_agents(self):
"""Test set_random_models_for_agents with proper Agent objects."""
# Create proper Agent objects
agents = [
Agent(
agent_name="agent1",
system_prompt="You are agent 1",
max_loops=1,
),
Agent(
agent_name="agent2",
system_prompt="You are agent 2",
max_loops=1,
),
]
# Set random models
model_names = ["gpt-4o-mini", "gpt-4o", "claude-3-5-sonnet"]
result = set_random_models_for_agents(
agents=agents, model_names=model_names
)
# Verify results
assert len(result) == 2
assert all(isinstance(agent, Agent) for agent in result)
assert all(hasattr(agent, "model_name") for agent in result)
assert all(
agent.model_name in model_names for agent in result
)
def test_set_random_models_for_agents_with_single_agent(self):
"""Test set_random_models_for_agents with a single agent."""
agent = Agent(
agent_name="single_agent",
system_prompt="You are helpful",
max_loops=1,
)
model_names = ["gpt-4o-mini", "gpt-4o"]
result = set_random_models_for_agents(
agents=agent, model_names=model_names
)
assert isinstance(result, Agent)
assert hasattr(result, "model_name")
assert result.model_name in model_names
def test_set_random_models_for_agents_with_none(self):
"""Test set_random_models_for_agents with None returns random model name."""
model_names = ["gpt-4o-mini", "gpt-4o", "claude-3-5-sonnet"]
result = set_random_models_for_agents(
agents=None, model_names=model_names
)
assert isinstance(result, str)
assert result in model_names
@pytest.mark.skip(
reason="This test requires API key and makes LLM calls"
)
def test_auto_swarm_builder_return_agents_objects_integration(
self,
):
"""Integration test for AutoSwarmBuilder with execution_type='return-agents-objects'.
This test requires OPENAI_API_KEY and makes actual LLM calls.
Run manually with: pytest -k test_auto_swarm_builder_return_agents_objects_integration -v
"""
builder = AutoSwarmBuilder(
execution_type="return-agents-objects",
model_name="gpt-4o-mini",
max_loops=1,
verbose=False,
)
agents = builder.run(
"Create a team of 2 data analysis agents with specific roles"
)
# Verify agents were created
assert isinstance(agents, list)
assert len(agents) >= 1
assert all(isinstance(agent, Agent) for agent in agents)
assert all(hasattr(agent, "agent_name") for agent in agents)
assert all(
hasattr(agent, "agent_description") for agent in agents
)
def test_agent_spec_to_agent_all_fields(self):
"""Test that all AgentSpec fields are properly passed to Agent."""
builder = AutoSwarmBuilder()
agent_spec = AgentSpec(
agent_name="full_test_agent",
description="Full test description",
system_prompt="You are a comprehensive test agent",
model_name="gpt-4o-mini",
auto_generate_prompt=False,
max_tokens=4096,
temperature=0.7,
role="worker",
max_loops=3,
goal="Test all parameters",
)
agents = builder.create_agents_from_specs(
{"agents": [agent_spec]}
)
assert len(agents) == 1
agent = agents[0]
# Verify all fields were set
assert agent.agent_name == "full_test_agent"
assert agent.agent_description == "Full test description"
# Agent may modify system_prompt by adding additional instructions
assert (
"You are a comprehensive test agent"
in agent.system_prompt
)
assert agent.max_loops == 3
assert agent.max_tokens == 4096
assert agent.temperature == 0.7
def test_create_agents_from_specs_empty_list(self):
"""Test that create_agents_from_specs handles empty agent list."""
builder = AutoSwarmBuilder()
specs = {"agents": []}
agents = builder.create_agents_from_specs(specs)
assert isinstance(agents, list)
assert len(agents) == 0
if __name__ == "__main__":
run_all_tests()

@ -1,287 +0,0 @@
import os
from datetime import datetime
import pytest
from swarms.structs.base_structure import BaseStructure
class TestBaseStructure:
def test_init(self):
base_structure = BaseStructure(
name="TestStructure",
description="Test description",
save_metadata=True,
save_artifact_path="./test_artifacts",
save_metadata_path="./test_metadata",
save_error_path="./test_errors",
)
assert base_structure.name == "TestStructure"
assert base_structure.description == "Test description"
assert base_structure.save_metadata is True
assert base_structure.save_artifact_path == "./test_artifacts"
assert base_structure.save_metadata_path == "./test_metadata"
assert base_structure.save_error_path == "./test_errors"
def test_save_to_file_and_load_from_file(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
file_path = os.path.join(tmp_dir, "test_file.json")
data_to_save = {"key": "value"}
base_structure = BaseStructure()
base_structure.save_to_file(data_to_save, file_path)
loaded_data = base_structure.load_from_file(file_path)
assert loaded_data == data_to_save
def test_save_metadata_and_load_metadata(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure(save_metadata_path=tmp_dir)
metadata = {"name": "Test", "description": "Test metadata"}
base_structure.save_metadata(metadata)
loaded_metadata = base_structure.load_metadata()
assert loaded_metadata == metadata
def test_log_error(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure(save_error_path=tmp_dir)
error_message = "Test error message"
base_structure.log_error(error_message)
log_file = os.path.join(tmp_dir, "TestStructure_errors.log")
with open(log_file) as file:
lines = file.readlines()
assert len(lines) == 1
assert lines[0] == f"{error_message}\n"
def test_save_artifact_and_load_artifact(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure(save_artifact_path=tmp_dir)
artifact = {"key": "value"}
artifact_name = "test_artifact"
base_structure.save_artifact(artifact, artifact_name)
loaded_artifact = base_structure.load_artifact(artifact_name)
assert loaded_artifact == artifact
def test_current_timestamp(self):
base_structure = BaseStructure()
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
timestamp = base_structure._current_timestamp()
assert timestamp == current_time
def test_log_event(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure(save_metadata_path=tmp_dir)
event = "Test event"
event_type = "INFO"
base_structure.log_event(event, event_type)
log_file = os.path.join(tmp_dir, "TestStructure_events.log")
with open(log_file) as file:
lines = file.readlines()
assert len(lines) == 1
assert (
lines[0] == f"[{base_structure._current_timestamp()}]"
f" [{event_type}] {event}\n"
)
@pytest.mark.asyncio
async def test_run_async(self):
base_structure = BaseStructure()
async def async_function():
return "Async Test Result"
result = await base_structure.run_async(async_function)
assert result == "Async Test Result"
@pytest.mark.asyncio
async def test_save_metadata_async(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure(save_metadata_path=tmp_dir)
metadata = {"name": "Test", "description": "Test metadata"}
await base_structure.save_metadata_async(metadata)
loaded_metadata = base_structure.load_metadata()
assert loaded_metadata == metadata
@pytest.mark.asyncio
async def test_log_error_async(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure(save_error_path=tmp_dir)
error_message = "Test error message"
await base_structure.log_error_async(error_message)
log_file = os.path.join(tmp_dir, "TestStructure_errors.log")
with open(log_file) as file:
lines = file.readlines()
assert len(lines) == 1
assert lines[0] == f"{error_message}\n"
@pytest.mark.asyncio
async def test_save_artifact_async(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure(save_artifact_path=tmp_dir)
artifact = {"key": "value"}
artifact_name = "test_artifact"
await base_structure.save_artifact_async(
artifact, artifact_name
)
loaded_artifact = base_structure.load_artifact(artifact_name)
assert loaded_artifact == artifact
@pytest.mark.asyncio
async def test_load_artifact_async(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure(save_artifact_path=tmp_dir)
artifact = {"key": "value"}
artifact_name = "test_artifact"
base_structure.save_artifact(artifact, artifact_name)
loaded_artifact = await base_structure.load_artifact_async(
artifact_name
)
assert loaded_artifact == artifact
@pytest.mark.asyncio
async def test_log_event_async(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure(save_metadata_path=tmp_dir)
event = "Test event"
event_type = "INFO"
await base_structure.log_event_async(event, event_type)
log_file = os.path.join(tmp_dir, "TestStructure_events.log")
with open(log_file) as file:
lines = file.readlines()
assert len(lines) == 1
assert (
lines[0] == f"[{base_structure._current_timestamp()}]"
f" [{event_type}] {event}\n"
)
@pytest.mark.asyncio
async def test_asave_to_file(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
file_path = os.path.join(tmp_dir, "test_file.json")
data_to_save = {"key": "value"}
base_structure = BaseStructure()
await base_structure.asave_to_file(data_to_save, file_path)
loaded_data = base_structure.load_from_file(file_path)
assert loaded_data == data_to_save
@pytest.mark.asyncio
async def test_aload_from_file(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
file_path = os.path.join(tmp_dir, "test_file.json")
data_to_save = {"key": "value"}
base_structure = BaseStructure()
base_structure.save_to_file(data_to_save, file_path)
loaded_data = await base_structure.aload_from_file(file_path)
assert loaded_data == data_to_save
def test_run_in_thread(self):
base_structure = BaseStructure()
result = base_structure.run_in_thread(
lambda: "Thread Test Result"
)
assert result.result() == "Thread Test Result"
def test_save_and_decompress_data(self):
base_structure = BaseStructure()
data = {"key": "value"}
compressed_data = base_structure.compress_data(data)
decompressed_data = base_structure.decompres_data(
compressed_data
)
assert decompressed_data == data
def test_run_batched(self):
base_structure = BaseStructure()
def run_function(data):
return f"Processed {data}"
batched_data = list(range(10))
result = base_structure.run_batched(
batched_data, batch_size=5, func=run_function
)
expected_result = [
f"Processed {data}" for data in batched_data
]
assert result == expected_result
def test_load_config(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
config_file = os.path.join(tmp_dir, "config.json")
config_data = {"key": "value"}
base_structure = BaseStructure()
base_structure.save_to_file(config_data, config_file)
loaded_config = base_structure.load_config(config_file)
assert loaded_config == config_data
def test_backup_data(self, tmpdir):
tmp_dir = tmpdir.mkdir("test_dir")
base_structure = BaseStructure()
data_to_backup = {"key": "value"}
base_structure.backup_data(
data_to_backup, backup_path=tmp_dir
)
backup_files = os.listdir(tmp_dir)
assert len(backup_files) == 1
loaded_data = base_structure.load_from_file(
os.path.join(tmp_dir, backup_files[0])
)
assert loaded_data == data_to_backup
def test_monitor_resources(self):
base_structure = BaseStructure()
base_structure.monitor_resources()
def test_run_with_resources(self):
base_structure = BaseStructure()
def run_function():
base_structure.monitor_resources()
return "Resource Test Result"
result = base_structure.run_with_resources(run_function)
assert result == "Resource Test Result"
def test_run_with_resources_batched(self):
base_structure = BaseStructure()
def run_function(data):
base_structure.monitor_resources()
return f"Processed {data}"
batched_data = list(range(10))
result = base_structure.run_with_resources_batched(
batched_data, batch_size=5, func=run_function
)
expected_result = [
f"Processed {data}" for data in batched_data
]
assert result == expected_result

@ -1,67 +0,0 @@
import json
import os
import pytest
from dotenv import load_dotenv
from swarm_models import OpenAIChat
from swarms.structs import BaseWorkflow
load_dotenv()
api_key = os.environ.get("OPENAI_API_KEY")
def setup_workflow():
llm = OpenAIChat(openai_api_key=api_key)
workflow = BaseWorkflow(max_loops=1)
workflow.add("What's the weather in miami", llm)
workflow.add("Create a report on these metrics", llm)
workflow.save_workflow_state("workflow_state.json")
return workflow
def teardown_workflow():
os.remove("workflow_state.json")
def test_load_workflow_state():
workflow = setup_workflow()
workflow.load_workflow_state("workflow_state.json")
assert workflow.max_loops == 1
assert len(workflow.tasks) == 2
assert (
workflow.tasks[0].description == "What's the weather in miami"
)
assert (
workflow.tasks[1].description
== "Create a report on these metrics"
)
teardown_workflow()
def test_load_workflow_state_with_missing_file():
workflow = setup_workflow()
with pytest.raises(FileNotFoundError):
workflow.load_workflow_state("non_existent_file.json")
teardown_workflow()
def test_load_workflow_state_with_invalid_file():
workflow = setup_workflow()
with open("invalid_file.json", "w") as f:
f.write("This is not valid JSON")
with pytest.raises(json.JSONDecodeError):
workflow.load_workflow_state("invalid_file.json")
os.remove("invalid_file.json")
teardown_workflow()
def test_load_workflow_state_with_missing_keys():
workflow = setup_workflow()
with open("missing_keys.json", "w") as f:
json.dump({"max_loops": 1}, f)
with pytest.raises(KeyError):
workflow.load_workflow_state("missing_keys.json")
os.remove("missing_keys.json")
teardown_workflow()

File diff suppressed because it is too large Load Diff

@ -2,129 +2,344 @@ from swarms import Agent
from swarms.structs.concurrent_workflow import ConcurrentWorkflow
def test_basic_workflow():
"""Test basic workflow initialization and execution"""
# Create test agents
agent1 = Agent(
agent_name="Test-Agent-1",
system_prompt="You are a test agent 1",
model_name="claude-3-sonnet-20240229",
def test_concurrent_workflow_basic_execution():
"""Test basic ConcurrentWorkflow execution with multiple agents"""
# Create specialized agents for different perspectives
research_agent = Agent(
agent_name="Research-Analyst",
agent_description="Agent specializing in research and data collection",
model_name="gpt-4o",
max_loops=1,
)
agent2 = Agent(
agent_name="Test-Agent-2",
system_prompt="You are a test agent 2",
model_name="claude-3-sonnet-20240229",
strategy_agent = Agent(
agent_name="Strategy-Consultant",
agent_description="Agent specializing in strategic planning and analysis",
model_name="gpt-4o",
max_loops=1,
)
risk_agent = Agent(
agent_name="Risk-Assessment-Specialist",
agent_description="Agent specializing in risk analysis and mitigation",
model_name="gpt-4o",
max_loops=1,
)
# Create workflow
# Create workflow with multiple agents
workflow = ConcurrentWorkflow(
name="test-workflow", agents=[agent1, agent2], max_loops=1
name="Multi-Perspective-Analysis-Workflow",
description="Concurrent analysis from research, strategy, and risk perspectives",
agents=[research_agent, strategy_agent, risk_agent],
max_loops=1,
)
# Run workflow
result = workflow.run("Test task")
result = workflow.run(
"Analyze the potential impact of quantum computing on cybersecurity"
)
# Verify results
assert len(result) == 2
assert all(isinstance(r, dict) for r in result)
assert all("agent" in r and "output" in r for r in result)
# Verify results - ConcurrentWorkflow returns a list of dictionaries
assert result is not None
assert isinstance(result, list)
assert len(result) == 3
for r in result:
assert isinstance(r, dict)
assert "agent" in r
assert "output" in r
# Output might be None or empty string, just check it exists
def test_dashboard_workflow():
"""Test workflow with dashboard enabled"""
agent = Agent(
agent_name="Dashboard-Test-Agent",
system_prompt="You are a test agent",
model_name="claude-3-sonnet-20240229",
def test_concurrent_workflow_with_dashboard():
"""Test ConcurrentWorkflow with dashboard visualization"""
# Create agents with different expertise
market_agent = Agent(
agent_name="Market-Analyst",
agent_description="Agent for market analysis and trends",
model_name="gpt-4o",
max_loops=1,
)
financial_agent = Agent(
agent_name="Financial-Expert",
agent_description="Agent for financial analysis and forecasting",
model_name="gpt-4o",
max_loops=1,
)
technology_agent = Agent(
agent_name="Technology-Specialist",
agent_description="Agent for technology assessment and innovation",
model_name="gpt-4o",
max_loops=1,
)
workflow = ConcurrentWorkflow(
name="dashboard-test",
agents=[agent],
name="Dashboard-Analysis-Workflow",
description="Concurrent analysis with real-time dashboard monitoring",
agents=[market_agent, financial_agent, technology_agent],
max_loops=1,
show_dashboard=True,
)
result = workflow.run("Test task")
result = workflow.run(
"Evaluate investment opportunities in renewable energy sector"
)
assert len(result) == 1
assert isinstance(result[0], dict)
assert "agent" in result[0]
assert "output" in result[0]
assert result is not None
assert isinstance(result, list)
assert len(result) == 3
for r in result:
assert isinstance(r, dict)
assert "agent" in r
assert "output" in r
# Output can be None or empty, just check structure
def test_multiple_agents():
"""Test workflow with multiple agents"""
def test_concurrent_workflow_batched_execution():
"""Test batched execution of multiple tasks"""
# Create agents for comprehensive analysis
agents = [
Agent(
agent_name=f"Agent-{i}",
system_prompt=f"You are test agent {i}",
model_name="claude-3-sonnet-20240229",
agent_name=f"Analysis-Agent-{i+1}",
agent_description=f"Agent {i+1} for comprehensive business analysis",
model_name="gpt-4o",
max_loops=1,
)
for i in range(3)
for i in range(4)
]
workflow = ConcurrentWorkflow(
name="multi-agent-test", agents=agents, max_loops=1
name="Batched-Analysis-Workflow",
description="Workflow for processing multiple analysis tasks",
agents=agents,
max_loops=1,
)
result = workflow.run("Multi-agent test task")
# Test batched execution
tasks = [
"Analyze market trends in AI adoption",
"Evaluate competitive landscape in cloud computing",
"Assess regulatory impacts on fintech",
"Review supply chain vulnerabilities in manufacturing",
]
assert len(result) == 3
assert all(isinstance(r, dict) for r in result)
assert all("agent" in r and "output" in r for r in result)
results = workflow.batch_run(tasks)
assert results is not None
assert isinstance(results, list)
assert len(results) == 4
# Each result should be a list of agent outputs
for result in results:
assert result is not None
assert isinstance(result, list)
def test_error_handling():
"""Test workflow error handling"""
# Create an agent that will raise an exception
agent = Agent(
agent_name="Error-Agent",
system_prompt="You are a test agent that will raise an error",
model_name="invalid-model", # This will cause an error
max_loops=1,
)
workflow = ConcurrentWorkflow(
name="error-test", agents=[agent], max_loops=1
)
def test_concurrent_workflow_error_handling():
"""Test ConcurrentWorkflow error handling and validation"""
# Test with empty agents list
try:
workflow = ConcurrentWorkflow(agents=[])
assert (
False
), "Should have raised ValueError for empty agents list"
except ValueError as e:
assert "No agents provided" in str(e)
# Test with None agents
try:
workflow.run("Test task")
assert False, "Expected an error but none was raised"
except Exception as e:
workflow = ConcurrentWorkflow(agents=None)
assert False, "Should have raised ValueError for None agents"
except ValueError as e:
assert "No agents provided" in str(e)
assert str(e) != "" # Verify we got an error message
def test_max_loops():
"""Test workflow respects max_loops setting"""
agent = Agent(
agent_name="Loop-Test-Agent",
system_prompt="You are a test agent",
model_name="claude-3-sonnet-20240229",
def test_concurrent_workflow_max_loops_configuration():
"""Test ConcurrentWorkflow max_loops configuration"""
agent1 = Agent(
agent_name="Loop-Test-Agent-1",
agent_description="First agent for loop testing",
model_name="gpt-4o",
max_loops=2,
)
agent2 = Agent(
agent_name="Loop-Test-Agent-2",
agent_description="Second agent for loop testing",
model_name="gpt-4o",
max_loops=3,
)
workflow = ConcurrentWorkflow(
name="loop-test",
agents=[agent],
name="Loop-Configuration-Test",
description="Testing max_loops configuration",
agents=[agent1, agent2],
max_loops=1, # This should override agent's max_loops
)
result = workflow.run("Test task")
result = workflow.run("Test workflow loop configuration")
assert result is not None
assert isinstance(result, list)
assert len(result) == 2
for r in result:
assert isinstance(r, dict)
assert "agent" in r
assert "output" in r
def test_concurrent_workflow_different_output_types():
"""Test ConcurrentWorkflow with different output types"""
# Create agents with diverse perspectives
technical_agent = Agent(
agent_name="Technical-Analyst",
agent_description="Agent for technical analysis",
model_name="gpt-4o",
max_loops=1,
)
business_agent = Agent(
agent_name="Business-Strategist",
agent_description="Agent for business strategy",
model_name="gpt-4o",
max_loops=1,
)
legal_agent = Agent(
agent_name="Legal-Expert",
agent_description="Agent for legal compliance analysis",
model_name="gpt-4o",
max_loops=1,
)
# Test different output types
for output_type in ["dict", "dict-all-except-first"]:
workflow = ConcurrentWorkflow(
name=f"Output-Type-Test-{output_type}",
description=f"Testing output type: {output_type}",
agents=[technical_agent, business_agent, legal_agent],
max_loops=1,
output_type=output_type,
)
result = workflow.run("Evaluate AI implementation strategy")
assert result is not None
# The result structure depends on output_type, just ensure it's not None
assert len(result) == 1
assert isinstance(result[0], dict)
def test_concurrent_workflow_real_world_scenario():
"""Test ConcurrentWorkflow in a realistic business scenario"""
# Create agents representing different departments
marketing_agent = Agent(
agent_name="Marketing-Director",
agent_description="Senior marketing director with 15 years experience",
model_name="gpt-4o",
max_loops=1,
)
product_agent = Agent(
agent_name="Product-Manager",
agent_description="Product manager specializing in AI/ML products",
model_name="gpt-4o",
max_loops=1,
)
engineering_agent = Agent(
agent_name="Lead-Engineer",
agent_description="Senior software engineer and technical architect",
model_name="gpt-4o",
max_loops=1,
)
sales_agent = Agent(
agent_name="Sales-Executive",
agent_description="Enterprise sales executive with tech background",
model_name="gpt-4o",
max_loops=1,
)
workflow = ConcurrentWorkflow(
name="Product-Launch-Review-Workflow",
description="Cross-functional team reviewing new AI product launch strategy",
agents=[
marketing_agent,
product_agent,
engineering_agent,
sales_agent,
],
max_loops=1,
)
# Test with a realistic business scenario
result = workflow.run(
"Review and provide recommendations for our new AI-powered analytics platform launch. "
"Consider market positioning, technical feasibility, competitive landscape, and sales strategy."
)
assert result is not None
assert isinstance(result, list)
assert len(result) == 4
for r in result:
assert isinstance(r, dict)
assert "agent" in r
assert "output" in r
# Output content may vary, just check structure
def test_concurrent_workflow_team_collaboration():
"""Test ConcurrentWorkflow with team collaboration features"""
# Create agents that would naturally collaborate
data_scientist = Agent(
agent_name="Data-Scientist",
agent_description="ML engineer and data scientist",
model_name="gpt-4o",
max_loops=1,
)
ux_designer = Agent(
agent_name="UX-Designer",
agent_description="User experience designer and researcher",
model_name="gpt-4o",
max_loops=1,
)
product_owner = Agent(
agent_name="Product-Owner",
agent_description="Product owner with business and technical background",
model_name="gpt-4o",
max_loops=1,
)
qa_engineer = Agent(
agent_name="QA-Engineer",
agent_description="Quality assurance engineer and testing specialist",
model_name="gpt-4o",
max_loops=1,
)
workflow = ConcurrentWorkflow(
name="Cross-Functional-Development-Workflow",
description="Cross-functional team collaborating on feature development",
agents=[
data_scientist,
ux_designer,
product_owner,
qa_engineer,
],
max_loops=1,
)
result = workflow.run(
"Design and plan a new recommendation system for our e-commerce platform. "
"Each team member should provide their perspective on implementation, user experience, "
"business value, and quality assurance considerations."
)
if __name__ == "__main__":
test_basic_workflow()
test_dashboard_workflow()
test_multiple_agents()
test_error_handling()
test_max_loops()
assert result is not None
assert isinstance(result, list)
assert len(result) == 4
for r in result:
assert isinstance(r, dict)
assert "agent" in r
assert "output" in r

File diff suppressed because it is too large Load Diff

@ -0,0 +1,354 @@
from swarms import Agent
from swarms.structs.hiearchical_swarm import HierarchicalSwarm
def test_hierarchical_swarm_basic_initialization():
"""Test basic HierarchicalSwarm initialization"""
# Create worker agents
research_agent = Agent(
agent_name="Research-Specialist",
agent_description="Specialist in research and data collection",
model_name="gpt-4o",
max_loops=1,
)
analysis_agent = Agent(
agent_name="Analysis-Expert",
agent_description="Expert in data analysis and insights",
model_name="gpt-4o",
max_loops=1,
)
implementation_agent = Agent(
agent_name="Implementation-Manager",
agent_description="Manager for implementation and execution",
model_name="gpt-4o",
max_loops=1,
)
# Create swarm with agents
swarm = HierarchicalSwarm(
name="Research-Analysis-Implementation-Swarm",
description="Hierarchical swarm for comprehensive project execution",
agents=[research_agent, analysis_agent, implementation_agent],
max_loops=1,
)
# Verify initialization
assert swarm.name == "Research-Analysis-Implementation-Swarm"
assert (
swarm.description
== "Hierarchical swarm for comprehensive project execution"
)
assert len(swarm.agents) == 3
assert swarm.max_loops == 1
assert swarm.director is not None
def test_hierarchical_swarm_with_director():
"""Test HierarchicalSwarm with custom director"""
# Create a custom director
director = Agent(
agent_name="Project-Director",
agent_description="Senior project director with extensive experience",
model_name="gpt-4o",
max_loops=1,
)
# Create worker agents
developer = Agent(
agent_name="Senior-Developer",
agent_description="Senior software developer",
model_name="gpt-4o",
max_loops=1,
)
tester = Agent(
agent_name="QA-Lead",
agent_description="Quality assurance lead",
model_name="gpt-4o",
max_loops=1,
)
# Create swarm with custom director
swarm = HierarchicalSwarm(
name="Software-Development-Swarm",
description="Hierarchical swarm for software development projects",
director=director,
agents=[developer, tester],
max_loops=2,
)
assert swarm.director == director
assert len(swarm.agents) == 2
assert swarm.max_loops == 2
def test_hierarchical_swarm_execution():
"""Test HierarchicalSwarm execution with multiple agents"""
# Create specialized agents
market_researcher = Agent(
agent_name="Market-Researcher",
agent_description="Market research specialist",
model_name="gpt-4o",
max_loops=1,
)
product_strategist = Agent(
agent_name="Product-Strategist",
agent_description="Product strategy and planning expert",
model_name="gpt-4o",
max_loops=1,
)
technical_architect = Agent(
agent_name="Technical-Architect",
agent_description="Technical architecture and design specialist",
model_name="gpt-4o",
max_loops=1,
)
risk_analyst = Agent(
agent_name="Risk-Analyst",
agent_description="Risk assessment and mitigation specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create hierarchical swarm
swarm = HierarchicalSwarm(
name="Product-Development-Swarm",
description="Comprehensive product development hierarchical swarm",
agents=[
market_researcher,
product_strategist,
technical_architect,
risk_analyst,
],
max_loops=1,
verbose=True,
)
# Execute swarm
result = swarm.run(
"Develop a comprehensive strategy for a new AI-powered healthcare platform"
)
# Verify result structure
assert result is not None
# HierarchicalSwarm returns a SwarmSpec or conversation history, just ensure it's not None
def test_hierarchical_swarm_multiple_loops():
"""Test HierarchicalSwarm with multiple feedback loops"""
# Create agents for iterative refinement
planner = Agent(
agent_name="Strategic-Planner",
agent_description="Strategic planning and project management",
model_name="gpt-4o",
max_loops=1,
)
executor = Agent(
agent_name="Task-Executor",
agent_description="Task execution and implementation",
model_name="gpt-4o",
max_loops=1,
)
reviewer = Agent(
agent_name="Quality-Reviewer",
agent_description="Quality assurance and review specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create swarm with multiple loops for iterative refinement
swarm = HierarchicalSwarm(
name="Iterative-Development-Swarm",
description="Hierarchical swarm with iterative feedback loops",
agents=[planner, executor, reviewer],
max_loops=3, # Allow multiple iterations
verbose=True,
)
# Execute with multiple loops
result = swarm.run(
"Create a detailed project plan for implementing a machine learning recommendation system"
)
assert result is not None
def test_hierarchical_swarm_error_handling():
"""Test HierarchicalSwarm error handling"""
# Test with empty agents list
try:
swarm = HierarchicalSwarm(agents=[])
assert (
False
), "Should have raised ValueError for empty agents list"
except ValueError as e:
assert "agents" in str(e).lower() or "empty" in str(e).lower()
# Test with invalid max_loops
researcher = Agent(
agent_name="Test-Researcher",
agent_description="Test researcher",
model_name="gpt-4o",
max_loops=1,
)
try:
swarm = HierarchicalSwarm(agents=[researcher], max_loops=0)
assert (
False
), "Should have raised ValueError for invalid max_loops"
except ValueError as e:
assert "max_loops" in str(e).lower() or "0" in str(e)
def test_hierarchical_swarm_collaboration_prompts():
"""Test HierarchicalSwarm with collaboration prompts enabled"""
# Create agents
data_analyst = Agent(
agent_name="Data-Analyst",
agent_description="Data analysis specialist",
model_name="gpt-4o",
max_loops=1,
)
business_analyst = Agent(
agent_name="Business-Analyst",
agent_description="Business analysis specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create swarm with collaboration prompts
swarm = HierarchicalSwarm(
name="Collaborative-Analysis-Swarm",
description="Hierarchical swarm with enhanced collaboration",
agents=[data_analyst, business_analyst],
max_loops=1,
add_collaboration_prompt=True,
)
# Check that collaboration prompts were added to agents
assert data_analyst.system_prompt is not None
assert business_analyst.system_prompt is not None
# Execute swarm
result = swarm.run(
"Analyze customer behavior patterns and provide business recommendations"
)
assert result is not None
def test_hierarchical_swarm_with_dashboard():
"""Test HierarchicalSwarm with interactive dashboard"""
# Create agents
content_creator = Agent(
agent_name="Content-Creator",
agent_description="Content creation specialist",
model_name="gpt-4o",
max_loops=1,
)
editor = Agent(
agent_name="Editor",
agent_description="Content editor and proofreader",
model_name="gpt-4o",
max_loops=1,
)
publisher = Agent(
agent_name="Publisher",
agent_description="Publishing and distribution specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create swarm with interactive dashboard
swarm = HierarchicalSwarm(
name="Content-Publishing-Swarm",
description="Hierarchical swarm for content creation and publishing",
agents=[content_creator, editor, publisher],
max_loops=1,
interactive=True,
verbose=True,
)
# Verify dashboard was created
assert swarm.dashboard is not None
assert swarm.interactive is True
# Execute swarm
result = swarm.run(
"Create a comprehensive guide on machine learning best practices"
)
assert result is not None
def test_hierarchical_swarm_real_world_scenario():
"""Test HierarchicalSwarm in a realistic business scenario"""
# Create agents representing different business functions
market_intelligence = Agent(
agent_name="Market-Intelligence-Director",
agent_description="Director of market intelligence and competitive analysis",
model_name="gpt-4o",
max_loops=1,
)
product_strategy = Agent(
agent_name="Product-Strategy-Manager",
agent_description="Product strategy and roadmap manager",
model_name="gpt-4o",
max_loops=1,
)
engineering_lead = Agent(
agent_name="Engineering-Lead",
agent_description="Senior engineering lead and technical architect",
model_name="gpt-4o",
max_loops=1,
)
operations_manager = Agent(
agent_name="Operations-Manager",
agent_description="Operations and implementation manager",
model_name="gpt-4o",
max_loops=1,
)
compliance_officer = Agent(
agent_name="Compliance-Officer",
agent_description="Legal compliance and regulatory specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create comprehensive hierarchical swarm
swarm = HierarchicalSwarm(
name="Enterprise-Strategy-Swarm",
description="Enterprise-level strategic planning and execution swarm",
agents=[
market_intelligence,
product_strategy,
engineering_lead,
operations_manager,
compliance_officer,
],
max_loops=2,
verbose=True,
add_collaboration_prompt=True,
)
# Test with complex enterprise scenario
result = swarm.run(
"Develop a comprehensive 5-year strategic plan for our company to become a leader in "
"AI-powered enterprise solutions. Consider market opportunities, competitive landscape, "
"technical requirements, operational capabilities, and regulatory compliance."
)
assert result is not None

@ -23,11 +23,8 @@ from swarms.structs import (
from swarms.structs.hiearchical_swarm import HierarchicalSwarm
from swarms.structs.tree_swarm import ForestSwarm, Tree, TreeAgent
# Load environment variables
load_dotenv()
API_KEY = os.getenv("OPENAI_API_KEY")
def generate_timestamp() -> str:
"""Generate a timestamp string for filenames"""
@ -82,58 +79,6 @@ def write_markdown_report(
f.write("---\n\n")
# def create_github_issue(test_result: Dict[str, Any]) -> Dict[str, Any]:
# """Create a GitHub issue for a failed test"""
# if not all([GITHUB_TOKEN, GITHUB_REPO_OWNER, GITHUB_REPO_NAME]):
# logger.warning("GitHub credentials not configured. Skipping issue creation.")
# return None
# if test_result["status"] != "failed":
# return None
# issue_title = f"Automated Test Failure: {test_result['test_name']}"
# issue_body = f"""
# ## Test Failure Report
# - **Test Name**: `{test_result['test_name']}`
# - **Timestamp**: `{datetime.now().isoformat()}`
# - **Status**: {test_result['status']}
# ### Error Information
# ```
# {test_result.get('error', 'No error message available')}
# ```
# ### Response (if available)
# ```json
# {json.dumps(test_result.get('response', {}), indent=2)}
# ```
# ---
# *This issue was automatically generated by the Swarms testing workflow.*
# """
# payload = {
# "title": issue_title,
# "body": issue_body,
# "labels": ["bug", "test-failure", "automated-report"],
# }
# try:
# response = requests.post(
# f"{BASE_URL}/repos/{GITHUB_REPO_OWNER}/{GITHUB_REPO_NAME}/issues",
# headers=GITHUB_HEADERS,
# json=payload,
# )
# response.raise_for_status()
# logger.info(f"Created GitHub issue for {test_result['test_name']}")
# return response.json()
# except requests.exceptions.RequestException as e:
# logger.error(f"Failed to create GitHub issue: {e.response.text if e.response else str(e)}")
# return None
def create_test_agent(
name: str,
system_prompt: str = None,
@ -939,10 +884,4 @@ def run_all_tests():
if __name__ == "__main__":
if not API_KEY:
logger.error(
"OPENAI_API_KEY environment variable not set. Aborting tests."
)
exit(1)
else:
run_all_tests()
run_all_tests()

@ -1,152 +1,203 @@
from unittest.mock import MagicMock
import pytest
from swarms.structs.agent import Agent
from swarms.structs.majority_voting import MajorityVoting
def test_majority_voting_run_concurrent(mocker):
# Create mock agents
agent1 = MagicMock(spec=Agent)
agent2 = MagicMock(spec=Agent)
agent3 = MagicMock(spec=Agent)
def test_majority_voting_basic_execution():
"""Test basic MajorityVoting execution with multiple agents"""
# Create specialized agents with different perspectives
geographer = Agent(
agent_name="Geography-Expert",
agent_description="Expert in geography and world capitals",
model_name="gpt-4o",
max_loops=1,
)
# Create mock majority voting
mv = MajorityVoting(
agents=[agent1, agent2, agent3],
concurrent=True,
multithreaded=False,
historian = Agent(
agent_name="History-Scholar",
agent_description="Historical and cultural context specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create mock conversation
conversation = MagicMock()
mv.conversation = conversation
political_analyst = Agent(
agent_name="Political-Analyst",
agent_description="Political and administrative specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create mock results
results = ["Paris", "Paris", "Lyon"]
# Create majority voting system
mv = MajorityVoting(
name="Geography-Consensus-System",
description="Majority voting system for geographical questions",
agents=[geographer, historian, political_analyst],
max_loops=1,
verbose=True,
)
# Mock agent.run method
agent1.run.return_value = results[0]
agent2.run.return_value = results[1]
agent3.run.return_value = results[2]
# Test execution
result = mv.run("What is the capital city of France?")
assert result is not None
# Run majority voting
majority_vote = mv.run("What is the capital of France?")
# Assert agent.run method was called with the correct task
agent1.run.assert_called_once_with(
"What is the capital of France?"
)
agent2.run.assert_called_once_with(
"What is the capital of France?"
)
agent3.run.assert_called_once_with(
"What is the capital of France?"
def test_majority_voting_multiple_loops():
"""Test MajorityVoting with multiple loops for consensus refinement"""
# Create agents with different knowledge bases
trivia_expert = Agent(
agent_name="Trivia-Expert",
agent_description="General knowledge and trivia specialist",
model_name="gpt-4o",
max_loops=1,
)
# Assert conversation.add method was called with the correct responses
conversation.add.assert_any_call(agent1.agent_name, results[0])
conversation.add.assert_any_call(agent2.agent_name, results[1])
conversation.add.assert_any_call(agent3.agent_name, results[2])
# Assert majority vote is correct
assert majority_vote is not None
research_analyst = Agent(
agent_name="Research-Analyst",
agent_description="Research and fact-checking specialist",
model_name="gpt-4o",
max_loops=1,
)
def test_majority_voting_run_multithreaded(mocker):
# Create mock agents
agent1 = MagicMock(spec=Agent)
agent2 = MagicMock(spec=Agent)
agent3 = MagicMock(spec=Agent)
subject_matter_expert = Agent(
agent_name="Subject-Matter-Expert",
agent_description="Deep subject matter expertise specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create mock majority voting
# Create majority voting with multiple loops for iterative refinement
mv = MajorityVoting(
agents=[agent1, agent2, agent3],
concurrent=False,
multithreaded=True,
name="Multi-Loop-Consensus-System",
description="Majority voting with iterative consensus refinement",
agents=[
trivia_expert,
research_analyst,
subject_matter_expert,
],
max_loops=3, # Allow multiple iterations
verbose=True,
)
# Create mock conversation
conversation = MagicMock()
mv.conversation = conversation
# Create mock results
results = ["Paris", "Paris", "Lyon"]
# Mock agent.run method
agent1.run.return_value = results[0]
agent2.run.return_value = results[1]
agent3.run.return_value = results[2]
# Test multi-loop execution
result = mv.run(
"What are the main causes of climate change and what can be done to mitigate them?"
)
assert result is not None
# Run majority voting
majority_vote = mv.run("What is the capital of France?")
# Assert agent.run method was called with the correct task
agent1.run.assert_called_once_with(
"What is the capital of France?"
)
agent2.run.assert_called_once_with(
"What is the capital of France?"
)
agent3.run.assert_called_once_with(
"What is the capital of France?"
def test_majority_voting_business_scenario():
"""Test MajorityVoting in a realistic business scenario"""
# Create agents representing different business perspectives
market_strategist = Agent(
agent_name="Market-Strategist",
agent_description="Market strategy and competitive analysis specialist",
model_name="gpt-4o",
max_loops=1,
)
# Assert conversation.add method was called with the correct responses
conversation.add.assert_any_call(agent1.agent_name, results[0])
conversation.add.assert_any_call(agent2.agent_name, results[1])
conversation.add.assert_any_call(agent3.agent_name, results[2])
financial_analyst = Agent(
agent_name="Financial-Analyst",
agent_description="Financial modeling and ROI analysis specialist",
model_name="gpt-4o",
max_loops=1,
)
# Assert majority vote is correct
assert majority_vote is not None
technical_architect = Agent(
agent_name="Technical-Architect",
agent_description="Technical feasibility and implementation specialist",
model_name="gpt-4o",
max_loops=1,
)
risk_manager = Agent(
agent_name="Risk-Manager",
agent_description="Risk assessment and compliance specialist",
model_name="gpt-4o",
max_loops=1,
)
@pytest.mark.asyncio
async def test_majority_voting_run_asynchronous(mocker):
# Create mock agents
agent1 = MagicMock(spec=Agent)
agent2 = MagicMock(spec=Agent)
agent3 = MagicMock(spec=Agent)
operations_expert = Agent(
agent_name="Operations-Expert",
agent_description="Operations and implementation specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create mock majority voting
# Create majority voting for business decisions
mv = MajorityVoting(
agents=[agent1, agent2, agent3],
concurrent=False,
multithreaded=False,
asynchronous=True,
name="Business-Decision-Consensus",
description="Majority voting system for business strategic decisions",
agents=[
market_strategist,
financial_analyst,
technical_architect,
risk_manager,
operations_expert,
],
max_loops=2,
verbose=True,
)
# Create mock conversation
conversation = MagicMock()
mv.conversation = conversation
# Create mock results
results = ["Paris", "Paris", "Lyon"]
# Test with complex business decision
result = mv.run(
"Should our company invest in developing an AI-powered customer service platform? "
"Consider market demand, financial implications, technical feasibility, risk factors, "
"and operational requirements."
)
# Mock agent.run method
agent1.run.return_value = results[0]
agent2.run.return_value = results[1]
agent3.run.return_value = results[2]
assert result is not None
# Run majority voting
majority_vote = await mv.run("What is the capital of France?")
# Assert agent.run method was called with the correct task
agent1.run.assert_called_once_with(
"What is the capital of France?"
def test_majority_voting_error_handling():
"""Test MajorityVoting error handling and validation"""
# Test with empty agents list
try:
mv = MajorityVoting(agents=[])
assert (
False
), "Should have raised ValueError for empty agents list"
except ValueError as e:
assert "agents" in str(e).lower() or "empty" in str(e).lower()
# Test with invalid max_loops
analyst = Agent(
agent_name="Test-Analyst",
agent_description="Test analyst",
model_name="gpt-4o",
max_loops=1,
)
agent2.run.assert_called_once_with(
"What is the capital of France?"
try:
mv = MajorityVoting(agents=[analyst], max_loops=0)
assert (
False
), "Should have raised ValueError for invalid max_loops"
except ValueError as e:
assert "max_loops" in str(e).lower() or "0" in str(e)
def test_majority_voting_different_output_types():
"""Test MajorityVoting with different output types"""
# Create agents for technical analysis
security_expert = Agent(
agent_name="Security-Expert",
agent_description="Cybersecurity and data protection specialist",
model_name="gpt-4o",
max_loops=1,
)
agent3.run.assert_called_once_with(
"What is the capital of France?"
compliance_officer = Agent(
agent_name="Compliance-Officer",
agent_description="Regulatory compliance and legal specialist",
model_name="gpt-4o",
max_loops=1,
)
# Assert conversation.add method was called with the correct responses
conversation.add.assert_any_call(agent1.agent_name, results[0])
conversation.add.assert_any_call(agent2.agent_name, results[1])
conversation.add.assert_any_call(agent3.agent_name, results[2])
privacy_advocate = Agent(
agent_name="Privacy-Advocate",
agent_description="Privacy protection and data rights specialist",
model_name="gpt-4o",
max_loops=1,
)
# Assert majority vote is correct
assert majority_vote is not None

@ -1,84 +1,268 @@
import pytest
from unittest.mock import Mock, patch
from swarms.structs.mixture_of_agents import MixtureOfAgents
from swarms.structs.agent import Agent
from swarms_memory import BaseVectorDatabase
def test_init():
with patch.object(
MixtureOfAgents, "agent_check"
) as mock_agent_check, patch.object(
MixtureOfAgents, "final_agent_check"
) as mock_final_agent_check, patch.object(
MixtureOfAgents, "swarm_initialization"
) as mock_swarm_initialization, patch.object(
MixtureOfAgents, "communication_protocol"
) as mock_communication_protocol:
agents = [Mock(spec=Agent)]
final_agent = Mock(spec=Agent)
scp = Mock(spec=BaseVectorDatabase)
MixtureOfAgents(
agents=agents, final_agent=final_agent, scp=scp
)
mock_agent_check.assert_called_once()
mock_final_agent_check.assert_called_once()
mock_swarm_initialization.assert_called_once()
mock_communication_protocol.assert_called_once()
def test_communication_protocol():
agents = [Mock(spec=Agent)]
final_agent = Mock(spec=Agent)
scp = Mock(spec=BaseVectorDatabase)
swarm = MixtureOfAgents(
agents=agents, final_agent=final_agent, scp=scp
)
swarm.communication_protocol()
for agent in agents:
agent.long_term_memory.assert_called_once_with(scp)
def test_agent_check():
final_agent = Mock(spec=Agent)
with pytest.raises(TypeError):
MixtureOfAgents(agents="not a list", final_agent=final_agent)
with pytest.raises(TypeError):
MixtureOfAgents(
agents=["not an agent"], final_agent=final_agent
)
def test_final_agent_check():
agents = [Mock(spec=Agent)]
with pytest.raises(TypeError):
MixtureOfAgents(agents=agents, final_agent="not an agent")
def test_mixture_of_agents_basic_initialization():
"""Test basic MixtureOfAgents initialization with multiple agents"""
# Create multiple specialized agents
research_agent = Agent(
agent_name="Research-Specialist",
agent_description="Specialist in research and data collection",
model_name="gpt-4o",
max_loops=1,
)
analysis_agent = Agent(
agent_name="Analysis-Expert",
agent_description="Expert in data analysis and insights",
model_name="gpt-4o",
max_loops=1,
)
strategy_agent = Agent(
agent_name="Strategy-Consultant",
agent_description="Strategy and planning consultant",
model_name="gpt-4o",
max_loops=1,
)
def test_swarm_initialization():
with patch(
"swarms.structs.mixture_of_agents.logger"
) as mock_logger:
agents = [Mock(spec=Agent)]
final_agent = Mock(spec=Agent)
swarm = MixtureOfAgents(
agents=agents, final_agent=final_agent
)
swarm.swarm_initialization()
assert mock_logger.info.call_count == 3
def test_run():
with patch("swarms.structs.mixture_of_agents.logger"), patch(
"builtins.open", new_callable=Mock
) as mock_open:
agents = [Mock(spec=Agent)]
final_agent = Mock(spec=Agent)
swarm = MixtureOfAgents(
agents=agents, final_agent=final_agent
# Create aggregator agent
aggregator = Agent(
agent_name="Aggregator-Agent",
agent_description="Agent that aggregates responses from other agents",
model_name="gpt-4o",
max_loops=1,
)
# Create mixture of agents
moa = MixtureOfAgents(
name="Business-Analysis-Mixture",
description="Mixture of agents for comprehensive business analysis",
agents=[research_agent, analysis_agent, strategy_agent],
aggregator_agent=aggregator,
layers=3,
max_loops=1,
)
# Verify initialization
assert moa.name == "Business-Analysis-Mixture"
assert (
moa.description
== "Mixture of agents for comprehensive business analysis"
)
assert len(moa.agents) == 3
assert moa.aggregator_agent == aggregator
assert moa.layers == 3
assert moa.max_loops == 1
def test_mixture_of_agents_execution():
"""Test MixtureOfAgents execution with multiple agents"""
# Create diverse agents for different perspectives
market_analyst = Agent(
agent_name="Market-Analyst",
agent_description="Market analysis and trend specialist",
model_name="gpt-4o",
max_loops=1,
)
technical_expert = Agent(
agent_name="Technical-Expert",
agent_description="Technical feasibility and implementation specialist",
model_name="gpt-4o",
max_loops=1,
)
financial_analyst = Agent(
agent_name="Financial-Analyst",
agent_description="Financial modeling and ROI specialist",
model_name="gpt-4o",
max_loops=1,
)
risk_assessor = Agent(
agent_name="Risk-Assessor",
agent_description="Risk assessment and mitigation specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create aggregator for synthesis
aggregator = Agent(
agent_name="Executive-Summary-Agent",
agent_description="Executive summary and recommendation specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create mixture of agents
moa = MixtureOfAgents(
name="Comprehensive-Evaluation-Mixture",
description="Mixture of agents for comprehensive business evaluation",
agents=[
market_analyst,
technical_expert,
financial_analyst,
risk_assessor,
],
aggregator_agent=aggregator,
layers=2,
max_loops=1,
)
# Test execution
result = moa.run(
"Evaluate the feasibility of launching an AI-powered healthcare platform"
)
assert result is not None
def test_mixture_of_agents_multiple_layers():
"""Test MixtureOfAgents with multiple layers"""
# Create agents for layered analysis
data_collector = Agent(
agent_name="Data-Collector",
agent_description="Data collection and research specialist",
model_name="gpt-4o",
max_loops=1,
)
pattern_analyzer = Agent(
agent_name="Pattern-Analyzer",
agent_description="Pattern recognition and analysis specialist",
model_name="gpt-4o",
max_loops=1,
)
insight_generator = Agent(
agent_name="Insight-Generator",
agent_description="Insight generation and interpretation specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create aggregator
final_aggregator = Agent(
agent_name="Final-Aggregator",
agent_description="Final aggregation and conclusion specialist",
model_name="gpt-4o",
max_loops=1,
)
# Create mixture with multiple layers for deeper analysis
moa = MixtureOfAgents(
name="Multi-Layer-Analysis-Mixture",
description="Mixture of agents with multiple analysis layers",
agents=[data_collector, pattern_analyzer, insight_generator],
aggregator_agent=final_aggregator,
layers=4,
max_loops=1,
)
# Test multi-layer execution
result = moa.run(
"Analyze customer behavior patterns and provide strategic insights"
)
assert result is not None
def test_mixture_of_agents_error_handling():
"""Test MixtureOfAgents error handling and validation"""
# Test with empty agents list
try:
moa = MixtureOfAgents(agents=[])
assert (
False
), "Should have raised ValueError for empty agents list"
except ValueError as e:
assert "No agents provided" in str(e)
# Test with invalid aggregator system prompt
analyst = Agent(
agent_name="Test-Analyst",
agent_description="Test analyst",
model_name="gpt-4o",
max_loops=1,
)
try:
moa = MixtureOfAgents(
agents=[analyst], aggregator_system_prompt=""
)
swarm.run("task")
for agent in agents:
agent.run.assert_called_once()
final_agent.run.assert_called_once()
mock_open.assert_called_once_with(swarm.saved_file_name, "w")
assert (
False
), "Should have raised ValueError for empty system prompt"
except ValueError as e:
assert "No aggregator system prompt" in str(e)
def test_mixture_of_agents_real_world_scenario():
"""Test MixtureOfAgents in a realistic business scenario"""
# Create agents representing different business functions
marketing_director = Agent(
agent_name="Marketing-Director",
agent_description="Senior marketing director with market expertise",
model_name="gpt-4o",
max_loops=1,
)
product_manager = Agent(
agent_name="Product-Manager",
agent_description="Product strategy and development manager",
model_name="gpt-4o",
max_loops=1,
)
engineering_lead = Agent(
agent_name="Engineering-Lead",
agent_description="Senior engineering and technical architecture lead",
model_name="gpt-4o",
max_loops=1,
)
sales_executive = Agent(
agent_name="Sales-Executive",
agent_description="Enterprise sales and customer relationship executive",
model_name="gpt-4o",
max_loops=1,
)
legal_counsel = Agent(
agent_name="Legal-Counsel",
agent_description="Legal compliance and regulatory counsel",
model_name="gpt-4o",
max_loops=1,
)
# Create aggregator for executive decision making
executive_aggregator = Agent(
agent_name="Executive-Decision-Maker",
agent_description="Executive decision maker and strategic aggregator",
model_name="gpt-4o",
max_loops=1,
)
# Create comprehensive mixture of agents
moa = MixtureOfAgents(
name="Executive-Board-Mixture",
description="Mixture of agents representing executive board for strategic decisions",
agents=[
marketing_director,
product_manager,
engineering_lead,
sales_executive,
legal_counsel,
],
aggregator_agent=executive_aggregator,
layers=3,
max_loops=1,
)
# Test with complex business scenario
result = moa.run(
"Develop a comprehensive go-to-market strategy for our new AI-powered enterprise platform. "
"Consider market positioning, technical requirements, competitive landscape, sales channels, "
"and legal compliance requirements."
)
assert result is not None

@ -1,201 +0,0 @@
import json
import os
from unittest.mock import Mock
import pytest
from swarms import Agent
from swarm_models import OpenAIChat
from experimental.multi_agent_collab import MultiAgentCollaboration
# Initialize the director agent
director = Agent(
agent_name="Director",
system_prompt="Directs the tasks for the workers",
llm=OpenAIChat(),
max_loops=1,
dashboard=False,
streaming_on=True,
verbose=True,
stopping_token="<DONE>",
state_save_file_type="json",
saved_state_path="director.json",
)
# Initialize worker 1
worker1 = Agent(
agent_name="Worker1",
system_prompt="Generates a transcript for a youtube video on what swarms are",
llm=OpenAIChat(),
max_loops=1,
dashboard=False,
streaming_on=True,
verbose=True,
stopping_token="<DONE>",
state_save_file_type="json",
saved_state_path="worker1.json",
)
# Initialize worker 2
worker2 = Agent(
agent_name="Worker2",
system_prompt="Summarizes the transcript generated by Worker1",
llm=OpenAIChat(),
max_loops=1,
dashboard=False,
streaming_on=True,
verbose=True,
stopping_token="<DONE>",
state_save_file_type="json",
saved_state_path="worker2.json",
)
# Create a list of agents
agents = [director, worker1, worker2]
@pytest.fixture
def collaboration():
return MultiAgentCollaboration(agents)
def test_collaboration_initialization(collaboration):
assert len(collaboration.agents) == 2
assert callable(collaboration.select_next_speaker)
assert collaboration.max_loops == 10
assert collaboration.results == []
assert collaboration.logging is True
def test_reset(collaboration):
collaboration.reset()
for agent in collaboration.agents:
assert agent.step == 0
def test_inject(collaboration):
collaboration.inject("TestName", "TestMessage")
for agent in collaboration.agents:
assert "TestName" in agent.history[-1]
assert "TestMessage" in agent.history[-1]
def test_inject_agent(collaboration):
agent3 = Agent(llm=OpenAIChat(), max_loops=2)
collaboration.inject_agent(agent3)
assert len(collaboration.agents) == 3
assert agent3 in collaboration.agents
def test_step(collaboration):
collaboration.step()
for agent in collaboration.agents:
assert agent.step == 1
def test_ask_for_bid(collaboration):
agent = Mock()
agent.bid.return_value = "<5>"
bid = collaboration.ask_for_bid(agent)
assert bid == 5
def test_select_next_speaker(collaboration):
collaboration.select_next_speaker = Mock(return_value=0)
idx = collaboration.select_next_speaker(1, collaboration.agents)
assert idx == 0
def test_run(collaboration):
collaboration.run()
for agent in collaboration.agents:
assert agent.step == collaboration.max_loops
def test_format_results(collaboration):
collaboration.results = [
{"agent": "Agent1", "response": "Response1"}
]
formatted_results = collaboration.format_results(
collaboration.results
)
assert "Agent1 responded: Response1" in formatted_results
def test_save_and_load(collaboration):
collaboration.save()
loaded_state = collaboration.load()
assert loaded_state["_step"] == collaboration._step
assert loaded_state["results"] == collaboration.results
def test_performance(collaboration):
performance_data = collaboration.performance()
for agent in collaboration.agents:
assert agent.name in performance_data
assert "metrics" in performance_data[agent.name]
def test_set_interaction_rules(collaboration):
rules = {"rule1": "action1", "rule2": "action2"}
collaboration.set_interaction_rules(rules)
assert hasattr(collaboration, "interaction_rules")
assert collaboration.interaction_rules == rules
def test_repr(collaboration):
repr_str = repr(collaboration)
assert isinstance(repr_str, str)
assert "MultiAgentCollaboration" in repr_str
def test_load(collaboration):
state = {
"step": 5,
"results": [{"agent": "Agent1", "response": "Response1"}],
}
with open(collaboration.saved_file_path_name, "w") as file:
json.dump(state, file)
loaded_state = collaboration.load()
assert loaded_state["_step"] == state["step"]
assert loaded_state["results"] == state["results"]
def test_save(collaboration, tmp_path):
collaboration.saved_file_path_name = tmp_path / "test_save.json"
collaboration.save()
with open(collaboration.saved_file_path_name) as file:
saved_data = json.load(file)
assert saved_data["_step"] == collaboration._step
assert saved_data["results"] == collaboration.results
# Add more tests here...
# Add more parameterized tests for different scenarios...
# Example of exception testing
def test_exception_handling(collaboration):
agent = Mock()
agent.bid.side_effect = ValueError("Invalid bid")
with pytest.raises(ValueError):
collaboration.ask_for_bid(agent)
# Add more exception testing...
# Example of environment variable testing (if applicable)
@pytest.mark.parametrize("env_var", ["ENV_VAR_1", "ENV_VAR_2"])
def test_environment_variables(collaboration, monkeypatch, env_var):
monkeypatch.setenv(env_var, "test_value")
assert os.getenv(env_var) == "test_value"

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save