parent
4fa0d6b311
commit
bb46bd9f94
@ -1,19 +0,0 @@
|
||||
from swarms.sims.senator_assembly import SenatorAssembly
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Simulate a Senate vote on a bill to invade Cuba and claim it as the 51st state.
|
||||
|
||||
This function initializes the SenatorAssembly and runs a concurrent vote simulation
|
||||
on the specified bill.
|
||||
"""
|
||||
senator_simulation = SenatorAssembly()
|
||||
senator_simulation.simulate_vote_concurrent(
|
||||
"A bill proposing to deregulate the IPO (Initial Public Offering) market in the United States as extensively as possible. The bill seeks to remove or significantly reduce existing regulatory requirements and oversight for companies seeking to go public, with the aim of increasing market efficiency and access to capital. Senators must consider the potential economic, legal, and ethical consequences of such broad deregulation, and cast their votes accordingly.",
|
||||
batch_size=10,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,170 +0,0 @@
|
||||
from loguru import logger
|
||||
from swarms.structs.swarm_eval import (
|
||||
SwarmEvaluator,
|
||||
PRESET_DATASETS,
|
||||
)
|
||||
|
||||
import os
|
||||
from swarms import Agent
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from swarm_models import OpenAIChat
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
model = OpenAIChat(
|
||||
model_name="deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
|
||||
openai_api_key=os.getenv("TOGETHER_API_KEY"),
|
||||
base_url="https://api.together.xyz/v1",
|
||||
)
|
||||
|
||||
# Define system prompts for reasoning agents
|
||||
THINKING_AGENT_PROMPT = """You are a sophisticated analytical and strategic thinking agent focused on deep problem analysis and solution design.
|
||||
|
||||
Your core capabilities include:
|
||||
1. Comprehensive Problem Analysis
|
||||
- Break down complex problems into constituent elements
|
||||
- Map relationships and dependencies between components
|
||||
- Identify root causes and underlying patterns
|
||||
- Consider historical context and precedents
|
||||
|
||||
2. Multi-Perspective Evaluation
|
||||
- Examine issues from multiple stakeholder viewpoints
|
||||
- Consider short-term and long-term implications
|
||||
- Evaluate social, economic, technical, and ethical dimensions
|
||||
- Challenge assumptions and identify potential biases
|
||||
|
||||
3. Risk Assessment and Mitigation
|
||||
- Conduct thorough risk analysis across scenarios
|
||||
- Identify potential failure modes and edge cases
|
||||
- Develop contingency plans and mitigation strategies
|
||||
- Assess probability and impact of various outcomes
|
||||
|
||||
4. Strategic Solution Development
|
||||
- Generate multiple solution approaches
|
||||
- Evaluate trade-offs between different strategies
|
||||
- Consider resource constraints and limitations
|
||||
- Design scalable and sustainable solutions
|
||||
|
||||
5. Decision Framework Creation
|
||||
- Establish clear evaluation criteria
|
||||
- Weight competing priorities appropriately
|
||||
- Create structured decision matrices
|
||||
- Document reasoning and key decision factors
|
||||
|
||||
6. Systems Thinking
|
||||
- Map interconnections between system elements
|
||||
- Identify feedback loops and cascade effects
|
||||
- Consider emergent properties and behaviors
|
||||
- Account for dynamic system evolution
|
||||
|
||||
Your output should always include:
|
||||
- Clear articulation of your analytical process
|
||||
- Key assumptions and their justification
|
||||
- Potential risks and mitigation strategies
|
||||
- Multiple solution options with pros/cons
|
||||
- Specific recommendations with supporting rationale
|
||||
- Areas of uncertainty requiring further investigation
|
||||
|
||||
Focus on developing robust, well-reasoned strategies that account for complexity while remaining practical and actionable."""
|
||||
|
||||
ACTION_AGENT_PROMPT = """You are an advanced implementation and execution agent focused on turning strategic plans into concrete results.
|
||||
|
||||
Your core capabilities include:
|
||||
1. Strategic Implementation Planning
|
||||
- Break down high-level strategies into specific actions
|
||||
- Create detailed project roadmaps and timelines
|
||||
- Identify critical path dependencies
|
||||
- Establish clear milestones and success metrics
|
||||
- Design feedback and monitoring mechanisms
|
||||
|
||||
2. Resource Optimization
|
||||
- Assess resource requirements and constraints
|
||||
- Optimize resource allocation and scheduling
|
||||
- Identify efficiency opportunities
|
||||
- Plan for scalability and flexibility
|
||||
- Manage competing priorities effectively
|
||||
|
||||
3. Execution Management
|
||||
- Develop detailed implementation procedures
|
||||
- Create clear operational guidelines
|
||||
- Establish quality control measures
|
||||
- Design progress tracking systems
|
||||
- Build in review and adjustment points
|
||||
|
||||
4. Risk Management
|
||||
- Implement specific risk mitigation measures
|
||||
- Create early warning systems
|
||||
- Develop contingency procedures
|
||||
- Establish fallback positions
|
||||
- Monitor risk indicators
|
||||
|
||||
5. Stakeholder Management
|
||||
- Identify key stakeholders and their needs
|
||||
- Create communication plans
|
||||
- Establish feedback mechanisms
|
||||
- Manage expectations effectively
|
||||
- Build support and buy-in
|
||||
|
||||
6. Continuous Improvement
|
||||
- Monitor implementation effectiveness
|
||||
- Gather and analyze performance data
|
||||
- Identify improvement opportunities
|
||||
- Implement iterative enhancements
|
||||
- Document lessons learned
|
||||
|
||||
Your output should always include:
|
||||
- Detailed action plans with specific steps
|
||||
- Resource requirements and allocation plans
|
||||
- Timeline with key milestones
|
||||
- Success metrics and monitoring approach
|
||||
- Risk mitigation procedures
|
||||
- Communication and stakeholder management plans
|
||||
- Quality control measures
|
||||
- Feedback and adjustment mechanisms
|
||||
|
||||
Focus on practical, efficient, and effective implementation while maintaining high quality standards and achieving desired outcomes."""
|
||||
|
||||
# Initialize the thinking agent
|
||||
thinking_agent = Agent(
|
||||
agent_name="Strategic-Thinker",
|
||||
agent_description="Deep analysis and strategic planning agent",
|
||||
system_prompt=THINKING_AGENT_PROMPT,
|
||||
max_loops=1,
|
||||
llm=model,
|
||||
dynamic_temperature_enabled=True,
|
||||
)
|
||||
|
||||
|
||||
class DeepSeekSwarm:
|
||||
def __init__(self):
|
||||
self.thinking_agent = thinking_agent
|
||||
|
||||
def run(self, task: str):
|
||||
first_one = self.thinking_agent.run(task)
|
||||
|
||||
return self.thinking_agent.run(first_one)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Initialize the swarm (replace with your actual multi-agent system)
|
||||
swarm = DeepSeekSwarm()
|
||||
|
||||
# Initialize the evaluator with the swarm instance
|
||||
evaluator = SwarmEvaluator(swarm)
|
||||
|
||||
logger.info("Starting evaluation for dataset: gsm8k")
|
||||
|
||||
# For demonstration, we use 4 concurrent workers, show progress, and save results.
|
||||
results = evaluator.evaluate(
|
||||
"gsm8k",
|
||||
split="train",
|
||||
config=PRESET_DATASETS["gsm8k"],
|
||||
max_workers=os.cpu_count(),
|
||||
max_retries=3,
|
||||
show_progress=True,
|
||||
output_file="gsm8k_results.txt",
|
||||
)
|
||||
|
||||
logger.info(f"Results for gsm8k: {results}")
|
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Basic Graph Workflow Example
|
||||
|
||||
A minimal example showing how to use GraphWorkflow with backend selection.
|
||||
"""
|
||||
|
||||
from swarms.structs.graph_workflow import GraphWorkflow
|
||||
from swarms.structs.agent import Agent
|
||||
|
||||
agent_one = Agent(agent_name="research_agent", model="gpt-4o-mini")
|
||||
agent_two = Agent(
|
||||
agent_name="research_agent_two", model="gpt-4o-mini"
|
||||
)
|
||||
agent_three = Agent(
|
||||
agent_name="research_agent_three", model="gpt-4o-mini"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Run a basic graph workflow example without print statements.
|
||||
"""
|
||||
# Create agents
|
||||
|
||||
# Create workflow with backend selection
|
||||
workflow = GraphWorkflow(
|
||||
name="Basic Example",
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# Add agents to workflow
|
||||
workflow.add_node(agent_one)
|
||||
workflow.add_node(agent_two)
|
||||
workflow.add_node(agent_three)
|
||||
|
||||
# Create simple chain using the actual agent names
|
||||
workflow.add_edge("research_agent", "research_agent_two")
|
||||
workflow.add_edge("research_agent_two", "research_agent_three")
|
||||
|
||||
# Compile the workflow
|
||||
workflow.compile()
|
||||
|
||||
# Run the workflow
|
||||
task = "Complete a simple task"
|
||||
results = workflow.run(task)
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,27 @@
|
||||
from swarms.sims.senator_assembly import SenatorAssembly
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Runs a simulation of a Senate vote on a bill proposing significant tax cuts for all Americans.
|
||||
The bill is described in realistic legislative terms, and the simulation uses a concurrent voting model.
|
||||
"""
|
||||
senator_simulation = SenatorAssembly(
|
||||
model_name="claude-sonnet-4-20250514"
|
||||
)
|
||||
senator_simulation.simulate_vote_concurrent(
|
||||
(
|
||||
"A bill proposing a significant reduction in federal income tax rates for all American citizens. "
|
||||
"The legislation aims to lower tax brackets across the board, increase the standard deduction, "
|
||||
"and provide additional tax relief for middle- and lower-income families. Proponents argue that "
|
||||
"the bill will stimulate economic growth, increase disposable income, and enhance consumer spending. "
|
||||
"Opponents raise concerns about the potential impact on the federal deficit, funding for public services, "
|
||||
"and long-term fiscal responsibility. Senators must weigh the economic, social, and budgetary implications "
|
||||
"before casting their votes."
|
||||
),
|
||||
batch_size=10,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,306 +0,0 @@
|
||||
import json
|
||||
from typing import Any, List
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from swarms import Agent
|
||||
|
||||
|
||||
class AgentOutput(BaseModel):
|
||||
"""
|
||||
Schema for capturing metadata and results of an agent run.
|
||||
"""
|
||||
|
||||
agent_name: str = Field(..., description="Name of the agent.")
|
||||
input_query: str = Field(
|
||||
..., description="Input query provided to the agent."
|
||||
)
|
||||
output_result: Any = Field(
|
||||
..., description="Result produced by the agent."
|
||||
)
|
||||
metadata: dict = Field(
|
||||
..., description="Additional metadata about the agent run."
|
||||
)
|
||||
|
||||
|
||||
class MatrixSwarm:
|
||||
"""
|
||||
A class to manage a matrix of agents and perform matrix operations similar to linear algebra.
|
||||
"""
|
||||
|
||||
def __init__(self, agents: List[List[Agent]]):
|
||||
"""
|
||||
Initializes the MatrixSwarm with a 2D list of agents.
|
||||
Args:
|
||||
agents (List[List[Agent]]): 2D list of agents representing the matrix.
|
||||
"""
|
||||
if not agents or not all(
|
||||
isinstance(row, list) for row in agents
|
||||
):
|
||||
raise ValueError("Agents must be provided as a 2D list.")
|
||||
if not all(
|
||||
isinstance(agent, Agent)
|
||||
for row in agents
|
||||
for agent in row
|
||||
):
|
||||
raise ValueError(
|
||||
"All elements of the matrix must be instances of `Agent`."
|
||||
)
|
||||
self.agents = agents
|
||||
self.outputs = [] # List to store outputs as AgentOutput
|
||||
|
||||
def validate_dimensions(self, other: "MatrixSwarm") -> None:
|
||||
"""
|
||||
Validates that two matrices have compatible dimensions for operations.
|
||||
|
||||
Args:
|
||||
other (MatrixSwarm): Another MatrixSwarm.
|
||||
|
||||
Raises:
|
||||
ValueError: If dimensions are incompatible.
|
||||
"""
|
||||
if len(self.agents) != len(other.agents) or len(
|
||||
self.agents[0]
|
||||
) != len(other.agents[0]):
|
||||
raise ValueError(
|
||||
"Matrix dimensions are incompatible for this operation."
|
||||
)
|
||||
|
||||
def transpose(self) -> "MatrixSwarm":
|
||||
"""
|
||||
Transposes the matrix of agents (swap rows and columns).
|
||||
|
||||
Returns:
|
||||
MatrixSwarm: A new transposed MatrixSwarm.
|
||||
"""
|
||||
transposed_agents = [
|
||||
[self.agents[j][i] for j in range(len(self.agents))]
|
||||
for i in range(len(self.agents[0]))
|
||||
]
|
||||
return MatrixSwarm(transposed_agents)
|
||||
|
||||
def add(self, other: "MatrixSwarm") -> "MatrixSwarm":
|
||||
"""
|
||||
Adds two matrices element-wise.
|
||||
|
||||
Args:
|
||||
other (MatrixSwarm): Another MatrixSwarm to add.
|
||||
|
||||
Returns:
|
||||
MatrixSwarm: A new MatrixSwarm resulting from the addition.
|
||||
"""
|
||||
self.validate_dimensions(other)
|
||||
added_agents = [
|
||||
[self.agents[i][j] for j in range(len(self.agents[i]))]
|
||||
for i in range(len(self.agents))
|
||||
]
|
||||
return MatrixSwarm(added_agents)
|
||||
|
||||
def scalar_multiply(self, scalar: int) -> "MatrixSwarm":
|
||||
"""
|
||||
Scales the agents by duplicating them scalar times along the row.
|
||||
|
||||
Args:
|
||||
scalar (int): The scalar multiplier.
|
||||
|
||||
Returns:
|
||||
MatrixSwarm: A new MatrixSwarm where each agent is repeated scalar times along the row.
|
||||
"""
|
||||
scaled_agents = [
|
||||
[agent for _ in range(scalar) for agent in row]
|
||||
for row in self.agents
|
||||
]
|
||||
return MatrixSwarm(scaled_agents)
|
||||
|
||||
def multiply(
|
||||
self, other: "MatrixSwarm", inputs: List[str]
|
||||
) -> List[List[AgentOutput]]:
|
||||
"""
|
||||
Multiplies two matrices (dot product between rows and columns).
|
||||
|
||||
Args:
|
||||
other (MatrixSwarm): Another MatrixSwarm for multiplication.
|
||||
inputs (List[str]): A list of input queries for the agents.
|
||||
|
||||
Returns:
|
||||
List[List[AgentOutput]]: A resulting matrix of outputs after multiplication.
|
||||
"""
|
||||
if len(self.agents[0]) != len(other.agents):
|
||||
raise ValueError(
|
||||
"Matrix dimensions are incompatible for multiplication."
|
||||
)
|
||||
|
||||
results = []
|
||||
for i, row in enumerate(self.agents):
|
||||
row_results = []
|
||||
for col_idx in range(len(other.agents[0])):
|
||||
col = [
|
||||
other.agents[row_idx][col_idx]
|
||||
for row_idx in range(len(other.agents))
|
||||
]
|
||||
query = inputs[
|
||||
i
|
||||
] # Input query for the corresponding row
|
||||
intermediate_result = []
|
||||
|
||||
for agent_r, agent_c in zip(row, col):
|
||||
try:
|
||||
result = agent_r.run(query)
|
||||
intermediate_result.append(result)
|
||||
except Exception as e:
|
||||
intermediate_result.append(f"Error: {e}")
|
||||
|
||||
# Aggregate outputs from dot product
|
||||
combined_result = " ".join(
|
||||
intermediate_result
|
||||
) # Example aggregation
|
||||
row_results.append(
|
||||
AgentOutput(
|
||||
agent_name=f"DotProduct-{i}-{col_idx}",
|
||||
input_query=query,
|
||||
output_result=combined_result,
|
||||
metadata={"row": i, "col": col_idx},
|
||||
)
|
||||
)
|
||||
results.append(row_results)
|
||||
return results
|
||||
|
||||
def subtract(self, other: "MatrixSwarm") -> "MatrixSwarm":
|
||||
"""
|
||||
Subtracts two matrices element-wise.
|
||||
|
||||
Args:
|
||||
other (MatrixSwarm): Another MatrixSwarm to subtract.
|
||||
|
||||
Returns:
|
||||
MatrixSwarm: A new MatrixSwarm resulting from the subtraction.
|
||||
"""
|
||||
self.validate_dimensions(other)
|
||||
subtracted_agents = [
|
||||
[self.agents[i][j] for j in range(len(self.agents[i]))]
|
||||
for i in range(len(self.agents))
|
||||
]
|
||||
return MatrixSwarm(subtracted_agents)
|
||||
|
||||
def identity(self, size: int) -> "MatrixSwarm":
|
||||
"""
|
||||
Creates an identity matrix of agents with size `size`.
|
||||
|
||||
Args:
|
||||
size (int): Size of the identity matrix (NxN).
|
||||
|
||||
Returns:
|
||||
MatrixSwarm: An identity MatrixSwarm.
|
||||
"""
|
||||
identity_agents = [
|
||||
[
|
||||
(
|
||||
self.agents[i][j]
|
||||
if i == j
|
||||
else Agent(
|
||||
agent_name=f"Zero-Agent-{i}-{j}",
|
||||
system_prompt="",
|
||||
)
|
||||
)
|
||||
for j in range(size)
|
||||
]
|
||||
for i in range(size)
|
||||
]
|
||||
return MatrixSwarm(identity_agents)
|
||||
|
||||
def determinant(self) -> Any:
|
||||
"""
|
||||
Computes the determinant of a square MatrixSwarm.
|
||||
|
||||
Returns:
|
||||
Any: Determinant of the matrix (as agent outputs).
|
||||
"""
|
||||
if len(self.agents) != len(self.agents[0]):
|
||||
raise ValueError(
|
||||
"Determinant can only be computed for square matrices."
|
||||
)
|
||||
|
||||
# Recursive determinant calculation (example using placeholder logic)
|
||||
if len(self.agents) == 1:
|
||||
return self.agents[0][0].run("Compute determinant")
|
||||
|
||||
det_result = 0
|
||||
for i in range(len(self.agents)):
|
||||
submatrix = MatrixSwarm(
|
||||
[row[:i] + row[i + 1 :] for row in self.agents[1:]]
|
||||
)
|
||||
cofactor = ((-1) ** i) * self.agents[0][i].run(
|
||||
"Compute determinant"
|
||||
)
|
||||
det_result += cofactor * submatrix.determinant()
|
||||
return det_result
|
||||
|
||||
def save_to_file(self, path: str) -> None:
|
||||
"""
|
||||
Saves the agent matrix structure and metadata to a file.
|
||||
|
||||
Args:
|
||||
path (str): File path to save the matrix.
|
||||
"""
|
||||
try:
|
||||
matrix_data = {
|
||||
"agents": [
|
||||
[agent.agent_name for agent in row]
|
||||
for row in self.agents
|
||||
],
|
||||
"outputs": [output.dict() for output in self.outputs],
|
||||
}
|
||||
with open(path, "w") as f:
|
||||
json.dump(matrix_data, f, indent=4)
|
||||
logger.info(f"MatrixSwarm saved to {path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving MatrixSwarm: {e}")
|
||||
|
||||
|
||||
# # Example usage
|
||||
# if __name__ == "__main__":
|
||||
# from swarms.prompts.finance_agent_sys_prompt import (
|
||||
# FINANCIAL_AGENT_SYS_PROMPT,
|
||||
# )
|
||||
|
||||
# # Create a 3x3 matrix of agents
|
||||
# agents = [
|
||||
# [
|
||||
# Agent(
|
||||
# agent_name=f"Agent-{i}-{j}",
|
||||
# system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
|
||||
# model_name="gpt-4o-mini",
|
||||
# max_loops=1,
|
||||
# autosave=True,
|
||||
# dashboard=False,
|
||||
# verbose=True,
|
||||
# dynamic_temperature_enabled=True,
|
||||
# saved_state_path=f"agent_{i}_{j}.json",
|
||||
# user_name="swarms_corp",
|
||||
# retry_attempts=1,
|
||||
# context_length=200000,
|
||||
# return_step_meta=False,
|
||||
# output_type="string",
|
||||
# streaming_on=False,
|
||||
# )
|
||||
# for j in range(3)
|
||||
# ]
|
||||
# for i in range(3)
|
||||
# ]
|
||||
|
||||
# # Initialize the matrix
|
||||
# agent_matrix = MatrixSwarm(agents)
|
||||
|
||||
# # Example queries
|
||||
# inputs = [
|
||||
# "Explain Roth IRA benefits",
|
||||
# "Differences between ETFs and mutual funds",
|
||||
# "How to create a diversified portfolio",
|
||||
# ]
|
||||
|
||||
# # Run agents
|
||||
# outputs = agent_matrix.multiply(agent_matrix.transpose(), inputs)
|
||||
|
||||
# # Save results
|
||||
# agent_matrix.save_to_file("agent_matrix_results.json")
|
@ -1,326 +0,0 @@
|
||||
import math
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
|
||||
from datasets import Dataset, load_dataset
|
||||
from loguru import logger
|
||||
from tqdm import tqdm
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Logging configuration: log to console and file (rotating by size)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Swarm interface example
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Benchmark configuration
|
||||
# -----------------------------------------------------------------------------
|
||||
class BenchmarkConfig:
|
||||
"""
|
||||
Configuration for a benchmark dataset.
|
||||
|
||||
Attributes:
|
||||
input_column (str): The column containing the task prompt.
|
||||
answer_column (str): The column containing the expected answer.
|
||||
answer_extractor (Optional[Callable[[Any], str]]): Function to extract
|
||||
a string answer from the dataset's raw answer format.
|
||||
answer_matcher (Optional[Callable[[str, str], bool]]): Function to compare
|
||||
the expected answer and the swarm output. If None, a simple substring
|
||||
containment is used.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_column: str,
|
||||
answer_column: str,
|
||||
answer_extractor: Optional[Callable[[Any], str]] = None,
|
||||
answer_matcher: Optional[Callable[[str, str], bool]] = None,
|
||||
):
|
||||
self.input_column = input_column
|
||||
self.answer_column = answer_column
|
||||
self.answer_extractor = answer_extractor
|
||||
self.answer_matcher = answer_matcher
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Preset dataset configurations for popular benchmarks
|
||||
# -----------------------------------------------------------------------------
|
||||
PRESET_DATASETS: Dict[str, BenchmarkConfig] = {
|
||||
"gsm8k": BenchmarkConfig(
|
||||
input_column="question",
|
||||
answer_column="answer",
|
||||
),
|
||||
"squad": BenchmarkConfig(
|
||||
input_column="question",
|
||||
answer_column="answers",
|
||||
answer_extractor=lambda ans: (
|
||||
ans["text"][0]
|
||||
if isinstance(ans, dict)
|
||||
and "text" in ans
|
||||
and isinstance(ans["text"], list)
|
||||
and ans["text"]
|
||||
else str(ans)
|
||||
),
|
||||
),
|
||||
"winogrande": BenchmarkConfig(
|
||||
input_column="sentence",
|
||||
answer_column="answer",
|
||||
),
|
||||
"commonsense_qa": BenchmarkConfig(
|
||||
input_column="question",
|
||||
answer_column="answerKey",
|
||||
),
|
||||
# Add additional presets here.
|
||||
}
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# SwarmEvaluator with extended features
|
||||
# -----------------------------------------------------------------------------
|
||||
class SwarmEvaluator:
|
||||
"""
|
||||
Evaluator that uses a swarm of agents to process benchmark datasets
|
||||
from Hugging Face, with concurrency, retries, progress display, performance timing,
|
||||
and customizable answer matching.
|
||||
|
||||
Example:
|
||||
swarm = Swarm()
|
||||
evaluator = SwarmEvaluator(swarm)
|
||||
results = evaluator.evaluate("gsm8k", split="test", max_workers=4)
|
||||
print(results)
|
||||
"""
|
||||
|
||||
def __init__(self, swarm: callable) -> None:
|
||||
"""
|
||||
Initialize the evaluator with a given swarm.
|
||||
|
||||
Args:
|
||||
swarm (Swarm): A swarm instance with a callable run(task: str) method.
|
||||
"""
|
||||
self.swarm = swarm
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
dataset_name: str,
|
||||
split: str = "test",
|
||||
config: Optional[BenchmarkConfig] = None,
|
||||
max_workers: int = 1,
|
||||
max_retries: int = 3,
|
||||
show_progress: bool = True,
|
||||
output_file: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Evaluate the specified benchmark dataset using the swarm.
|
||||
|
||||
Args:
|
||||
dataset_name (str): The dataset name (from Hugging Face).
|
||||
split (str): The dataset split (e.g., "test", "validation").
|
||||
config (Optional[BenchmarkConfig]): Benchmark configuration. If None,
|
||||
a preset config is used.
|
||||
max_workers (int): Number of concurrent workers.
|
||||
max_retries (int): Number of retries for swarm tasks on failure.
|
||||
show_progress (bool): If True, display a progress bar.
|
||||
output_file (Optional[str]): Path to a file to write the results.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Evaluation metrics including total examples, correct answers,
|
||||
accuracy, and total evaluation time.
|
||||
"""
|
||||
if config is None:
|
||||
config = PRESET_DATASETS.get(dataset_name)
|
||||
if config is None:
|
||||
raise ValueError(
|
||||
f"No preset config for dataset '{dataset_name}'. Provide a BenchmarkConfig."
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Loading dataset '{dataset_name}' (split: {split})..."
|
||||
)
|
||||
dataset: Dataset = load_dataset(dataset_name, split=split)
|
||||
total_examples = len(dataset)
|
||||
logger.info(f"Total examples to evaluate: {total_examples}")
|
||||
|
||||
start_time = time.time()
|
||||
correct = 0
|
||||
|
||||
# Function to process a single example.
|
||||
def _process_example(
|
||||
example: Dict[str, Any], idx: int
|
||||
) -> Tuple[bool, float]:
|
||||
task_start = time.time()
|
||||
task_text = example.get(config.input_column)
|
||||
expected_answer = example.get(config.answer_column)
|
||||
|
||||
if task_text is None or expected_answer is None:
|
||||
logger.warning(
|
||||
f"Example {idx}: Missing '{config.input_column}' or '{config.answer_column}', skipping."
|
||||
)
|
||||
return (False, 0.0)
|
||||
|
||||
# Use answer_extractor if provided.
|
||||
if config.answer_extractor:
|
||||
try:
|
||||
expected_answer = config.answer_extractor(
|
||||
expected_answer
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Example {idx}: Error extracting answer: {e}"
|
||||
)
|
||||
return (False, 0.0)
|
||||
|
||||
logger.debug(f"Example {idx} - Task: {task_text}")
|
||||
logger.debug(
|
||||
f"Example {idx} - Expected Answer: {expected_answer}"
|
||||
)
|
||||
|
||||
try:
|
||||
swarm_output = self._run_with_retry(
|
||||
task_text, max_retries
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Example {idx}: Failed after retries. Error: {e}"
|
||||
)
|
||||
return (False, time.time() - task_start)
|
||||
|
||||
logger.debug(
|
||||
f"Example {idx} - Swarm Output: {swarm_output}"
|
||||
)
|
||||
|
||||
# Use custom matcher if provided; otherwise, default matching.
|
||||
if config.answer_matcher:
|
||||
is_correct = config.answer_matcher(
|
||||
expected_answer, swarm_output
|
||||
)
|
||||
else:
|
||||
is_correct = self._default_matcher(
|
||||
expected_answer, swarm_output
|
||||
)
|
||||
|
||||
task_time = time.time() - task_start
|
||||
logger.info(
|
||||
f"Example {idx}: {'Correct' if is_correct else 'Incorrect'} in {task_time:.2f}s"
|
||||
)
|
||||
return (is_correct, task_time)
|
||||
|
||||
# Use ThreadPoolExecutor for concurrency.
|
||||
futures = []
|
||||
total_time = 0.0
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Optionally wrap the dataset with tqdm for a progress bar.
|
||||
examples_iter = enumerate(dataset, start=1)
|
||||
if show_progress:
|
||||
examples_iter = tqdm(
|
||||
list(examples_iter),
|
||||
total=total_examples,
|
||||
desc="Evaluating",
|
||||
)
|
||||
|
||||
for idx, example in examples_iter:
|
||||
futures.append(
|
||||
executor.submit(_process_example, example, idx)
|
||||
)
|
||||
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
is_correct, elapsed = future.result()
|
||||
total_time += elapsed
|
||||
if is_correct:
|
||||
correct += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing an example: {e}")
|
||||
|
||||
overall_time = time.time() - start_time
|
||||
accuracy = (
|
||||
correct / total_examples if total_examples > 0 else 0.0
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Evaluation complete. Total examples: {total_examples}, Correct: {correct}, "
|
||||
f"Accuracy: {accuracy:.2%}, Overall Time: {overall_time:.2f}s, "
|
||||
f"Average per-example time: {total_time/total_examples if total_examples else 0:.2f}s"
|
||||
)
|
||||
|
||||
results = {
|
||||
"total": total_examples,
|
||||
"correct": correct,
|
||||
"accuracy": accuracy,
|
||||
"overall_time": overall_time,
|
||||
"average_example_time": (
|
||||
total_time / total_examples
|
||||
if total_examples
|
||||
else math.nan
|
||||
),
|
||||
}
|
||||
|
||||
# Optionally save results to a file.
|
||||
if output_file:
|
||||
try:
|
||||
with open(output_file, "w") as f:
|
||||
for key, value in results.items():
|
||||
f.write(f"{key}: {value}\n")
|
||||
logger.info(f"Results saved to {output_file}")
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error saving results to {output_file}: {e}"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def _run_with_retry(self, task: str, max_retries: int) -> str:
|
||||
"""
|
||||
Runs the swarm task with a retry mechanism.
|
||||
|
||||
Args:
|
||||
task (str): The task string.
|
||||
max_retries (int): Maximum number of retries.
|
||||
|
||||
Returns:
|
||||
str: Swarm output.
|
||||
|
||||
Raises:
|
||||
Exception: If all retries fail.
|
||||
"""
|
||||
attempt = 0
|
||||
while attempt <= max_retries:
|
||||
try:
|
||||
start = time.time()
|
||||
result = self.swarm.run(task)
|
||||
elapsed = time.time() - start
|
||||
logger.debug(
|
||||
f"Task succeeded in {elapsed:.2f}s on attempt {attempt + 1}"
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Task failed on attempt {attempt + 1}: {e}"
|
||||
)
|
||||
attempt += 1
|
||||
time.sleep(0.5 * attempt) # Exponential backoff
|
||||
raise Exception("Max retries exceeded for task.")
|
||||
|
||||
@staticmethod
|
||||
def _default_matcher(expected: str, output: str) -> bool:
|
||||
"""
|
||||
Default answer matching using a normalized substring check.
|
||||
|
||||
Args:
|
||||
expected (str): The expected answer.
|
||||
output (str): The swarm output.
|
||||
|
||||
Returns:
|
||||
bool: True if expected is found in output; otherwise, False.
|
||||
"""
|
||||
expected_norm = " ".join(expected.strip().split())
|
||||
output_norm = " ".join(output.strip().split())
|
||||
return expected_norm in output_norm
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Example usage
|
||||
# -----------------------------------------------------------------------------
|
@ -1,216 +0,0 @@
|
||||
from swarms.structs.matrix_swarm import AgentMatrix, AgentOutput
|
||||
from swarms import Agent
|
||||
|
||||
|
||||
def create_test_matrix(rows: int, cols: int) -> AgentMatrix:
|
||||
"""Helper function to create a test agent matrix"""
|
||||
agents = [
|
||||
[
|
||||
Agent(
|
||||
agent_name=f"TestAgent-{i}-{j}",
|
||||
system_prompt="Test prompt",
|
||||
)
|
||||
for j in range(cols)
|
||||
]
|
||||
for i in range(rows)
|
||||
]
|
||||
return AgentMatrix(agents)
|
||||
|
||||
|
||||
def test_init():
|
||||
"""Test AgentMatrix initialization"""
|
||||
# Test valid initialization
|
||||
matrix = create_test_matrix(2, 2)
|
||||
assert isinstance(matrix, AgentMatrix)
|
||||
assert len(matrix.agents) == 2
|
||||
assert len(matrix.agents[0]) == 2
|
||||
|
||||
# Test invalid initialization
|
||||
try:
|
||||
AgentMatrix([[1, 2], [3, 4]]) # Non-agent elements
|
||||
assert False, "Should raise ValueError"
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
AgentMatrix([]) # Empty matrix
|
||||
assert False, "Should raise ValueError"
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def test_transpose():
|
||||
"""Test matrix transpose operation"""
|
||||
matrix = create_test_matrix(2, 3)
|
||||
transposed = matrix.transpose()
|
||||
|
||||
assert len(transposed.agents) == 3 # Original cols become rows
|
||||
assert len(transposed.agents[0]) == 2 # Original rows become cols
|
||||
|
||||
# Verify agent positions
|
||||
for i in range(2):
|
||||
for j in range(3):
|
||||
assert (
|
||||
matrix.agents[i][j].agent_name
|
||||
== transposed.agents[j][i].agent_name
|
||||
)
|
||||
|
||||
|
||||
def test_add():
|
||||
"""Test matrix addition"""
|
||||
matrix1 = create_test_matrix(2, 2)
|
||||
matrix2 = create_test_matrix(2, 2)
|
||||
|
||||
result = matrix1.add(matrix2)
|
||||
assert len(result.agents) == 2
|
||||
assert len(result.agents[0]) == 2
|
||||
|
||||
# Test incompatible dimensions
|
||||
matrix3 = create_test_matrix(2, 3)
|
||||
try:
|
||||
matrix1.add(matrix3)
|
||||
assert False, "Should raise ValueError"
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def test_scalar_multiply():
|
||||
"""Test scalar multiplication"""
|
||||
matrix = create_test_matrix(2, 2)
|
||||
scalar = 3
|
||||
result = matrix.scalar_multiply(scalar)
|
||||
|
||||
assert len(result.agents) == 2
|
||||
assert len(result.agents[0]) == 2 * scalar
|
||||
|
||||
# Verify agent duplication
|
||||
for i in range(len(result.agents)):
|
||||
for j in range(0, len(result.agents[0]), scalar):
|
||||
original_agent = matrix.agents[i][j // scalar]
|
||||
for k in range(scalar):
|
||||
assert (
|
||||
result.agents[i][j + k].agent_name
|
||||
== original_agent.agent_name
|
||||
)
|
||||
|
||||
|
||||
def test_multiply():
|
||||
"""Test matrix multiplication"""
|
||||
matrix1 = create_test_matrix(2, 3)
|
||||
matrix2 = create_test_matrix(3, 2)
|
||||
inputs = ["test query 1", "test query 2"]
|
||||
|
||||
result = matrix1.multiply(matrix2, inputs)
|
||||
assert len(result) == 2 # Number of rows in first matrix
|
||||
assert len(result[0]) == 2 # Number of columns in second matrix
|
||||
|
||||
# Verify output structure
|
||||
for row in result:
|
||||
for output in row:
|
||||
assert isinstance(output, AgentOutput)
|
||||
assert isinstance(output.input_query, str)
|
||||
assert isinstance(output.metadata, dict)
|
||||
|
||||
|
||||
def test_subtract():
|
||||
"""Test matrix subtraction"""
|
||||
matrix1 = create_test_matrix(2, 2)
|
||||
matrix2 = create_test_matrix(2, 2)
|
||||
|
||||
result = matrix1.subtract(matrix2)
|
||||
assert len(result.agents) == 2
|
||||
assert len(result.agents[0]) == 2
|
||||
|
||||
|
||||
def test_identity():
|
||||
"""Test identity matrix creation"""
|
||||
matrix = create_test_matrix(3, 3)
|
||||
identity = matrix.identity(3)
|
||||
|
||||
assert len(identity.agents) == 3
|
||||
assert len(identity.agents[0]) == 3
|
||||
|
||||
# Verify diagonal elements are from original matrix
|
||||
for i in range(3):
|
||||
assert (
|
||||
identity.agents[i][i].agent_name
|
||||
== matrix.agents[i][i].agent_name
|
||||
)
|
||||
|
||||
# Verify non-diagonal elements are zero agents
|
||||
for j in range(3):
|
||||
if i != j:
|
||||
assert identity.agents[i][j].agent_name.startswith(
|
||||
"Zero-Agent"
|
||||
)
|
||||
|
||||
|
||||
def test_determinant():
|
||||
"""Test determinant calculation"""
|
||||
# Test 1x1 matrix
|
||||
matrix1 = create_test_matrix(1, 1)
|
||||
det1 = matrix1.determinant()
|
||||
assert det1 is not None
|
||||
|
||||
# Test 2x2 matrix
|
||||
matrix2 = create_test_matrix(2, 2)
|
||||
det2 = matrix2.determinant()
|
||||
assert det2 is not None
|
||||
|
||||
# Test non-square matrix
|
||||
matrix3 = create_test_matrix(2, 3)
|
||||
try:
|
||||
matrix3.determinant()
|
||||
assert False, "Should raise ValueError"
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def test_save_to_file(tmp_path):
|
||||
"""Test saving matrix to file"""
|
||||
import os
|
||||
|
||||
matrix = create_test_matrix(2, 2)
|
||||
file_path = os.path.join(tmp_path, "test_matrix.json")
|
||||
|
||||
matrix.save_to_file(file_path)
|
||||
assert os.path.exists(file_path)
|
||||
|
||||
# Verify file contents
|
||||
import json
|
||||
|
||||
with open(file_path, "r") as f:
|
||||
data = json.load(f)
|
||||
assert "agents" in data
|
||||
assert "outputs" in data
|
||||
assert len(data["agents"]) == 2
|
||||
assert len(data["agents"][0]) == 2
|
||||
|
||||
|
||||
def run_all_tests():
|
||||
"""Run all test functions"""
|
||||
test_functions = [
|
||||
test_init,
|
||||
test_transpose,
|
||||
test_add,
|
||||
test_scalar_multiply,
|
||||
test_multiply,
|
||||
test_subtract,
|
||||
test_identity,
|
||||
test_determinant,
|
||||
]
|
||||
|
||||
for test_func in test_functions:
|
||||
try:
|
||||
test_func()
|
||||
print(f"✅ {test_func.__name__} passed")
|
||||
except AssertionError as e:
|
||||
print(f"❌ {test_func.__name__} failed: {str(e)}")
|
||||
except Exception as e:
|
||||
print(
|
||||
f"❌ {test_func.__name__} failed with exception: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_all_tests()
|
Loading…
Reference in new issue