You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1021 lines
39 KiB
1021 lines
39 KiB
"""
|
|
GraphWorkflow Real-World Examples and Benchmarks
|
|
|
|
This file contains comprehensive real-world examples demonstrating GraphWorkflow's
|
|
capabilities across different domains. Each example serves as a benchmark and
|
|
showcases specific features and use cases.
|
|
"""
|
|
|
|
import asyncio
|
|
import time
|
|
import json
|
|
import os
|
|
import sys
|
|
import requests
|
|
from typing import Dict, Any, List
|
|
from datetime import datetime
|
|
|
|
# Add the parent directory to the path so we can import from swarms
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
from swarms.structs.graph_workflow import (
|
|
GraphWorkflow, Node, Edge, NodeType, EdgeType, GraphEngine
|
|
)
|
|
|
|
# Check for API key in environment variables
|
|
if not os.getenv("SWARMS_API_KEY"):
|
|
print("⚠️ Warning: SWARMS_API_KEY environment variable not set.")
|
|
print(" Please set your API key: export SWARMS_API_KEY='your-api-key-here'")
|
|
print(" Or set it in your environment variables.")
|
|
|
|
# API Configuration
|
|
API_KEY = os.getenv("SWARMS_API_KEY", "your-api-key-here")
|
|
BASE_URL = "https://api.swarms.world"
|
|
|
|
headers = {
|
|
"x-api-key": API_KEY,
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
|
|
class MockAgent:
|
|
"""Mock agent for testing without API calls."""
|
|
|
|
def __init__(self, agent_name: str, system_prompt: str):
|
|
self.agent_name = agent_name
|
|
self.system_prompt = system_prompt
|
|
|
|
async def run(self, task: str, **kwargs):
|
|
"""Mock agent execution."""
|
|
# Simulate some processing time
|
|
await asyncio.sleep(0.1)
|
|
return f"Mock response from {self.agent_name}: {task[:50]}..."
|
|
|
|
def arun(self, task: str, **kwargs):
|
|
"""Async run method for compatibility."""
|
|
return self.run(task, **kwargs)
|
|
|
|
|
|
class GraphWorkflowBenchmarks:
|
|
"""Collection of real-world GraphWorkflow examples and benchmarks."""
|
|
|
|
def __init__(self):
|
|
"""Initialize benchmark examples."""
|
|
self.results = {}
|
|
self.start_time = None
|
|
|
|
def start_benchmark(self, name: str):
|
|
"""Start timing a benchmark."""
|
|
self.start_time = time.time()
|
|
print(f"\n🚀 Starting benchmark: {name}")
|
|
|
|
def end_benchmark(self, name: str, result: Dict[str, Any]):
|
|
"""End timing a benchmark and store results."""
|
|
if self.start_time:
|
|
duration = time.time() - self.start_time
|
|
result['duration'] = duration
|
|
result['timestamp'] = datetime.now().isoformat()
|
|
self.results[name] = result
|
|
print(f"✅ Completed {name} in {duration:.2f}s")
|
|
self.start_time = None
|
|
return result
|
|
|
|
async def benchmark_software_development_pipeline(self):
|
|
"""Benchmark: Software Development Pipeline with Code Generation, Testing, and Deployment."""
|
|
self.start_benchmark("Software Development Pipeline")
|
|
|
|
# Create mock agents (no API calls needed)
|
|
code_generator = MockAgent(
|
|
agent_name="CodeGenerator",
|
|
system_prompt="You are an expert Python developer. Generate clean, well-documented code."
|
|
)
|
|
|
|
code_reviewer = MockAgent(
|
|
agent_name="CodeReviewer",
|
|
system_prompt="You are a senior code reviewer. Check for bugs, security issues, and best practices."
|
|
)
|
|
|
|
test_generator = MockAgent(
|
|
agent_name="TestGenerator",
|
|
system_prompt="You are a QA engineer. Generate comprehensive unit tests for the given code."
|
|
)
|
|
|
|
# Create workflow
|
|
workflow = GraphWorkflow(
|
|
name="Software Development Pipeline",
|
|
description="Complete software development pipeline from code generation to deployment",
|
|
max_loops=1,
|
|
timeout=600.0,
|
|
show_dashboard=True,
|
|
auto_save=True,
|
|
graph_engine=GraphEngine.NETWORKX
|
|
)
|
|
|
|
# Define processing functions
|
|
def validate_code(**kwargs):
|
|
"""Validate generated code meets requirements."""
|
|
code = kwargs.get('generated_code', '')
|
|
return len(code) > 100 and 'def ' in code
|
|
|
|
def run_tests(**kwargs):
|
|
"""Simulate running tests."""
|
|
tests = kwargs.get('test_code', '')
|
|
# Simulate test execution
|
|
return f"Tests executed: {len(tests.split('def test_')) - 1} tests passed"
|
|
|
|
def deploy_code(**kwargs):
|
|
"""Simulate code deployment."""
|
|
code = kwargs.get('generated_code', '')
|
|
tests = kwargs.get('test_results', '')
|
|
return f"Deployed code ({len(code)} chars) with {tests}"
|
|
|
|
# Create nodes
|
|
nodes = [
|
|
Node(
|
|
id="code_generation",
|
|
type=NodeType.AGENT,
|
|
agent=code_generator,
|
|
output_keys=["generated_code"],
|
|
timeout=120.0,
|
|
retry_count=2,
|
|
parallel=True,
|
|
),
|
|
Node(
|
|
id="code_review",
|
|
type=NodeType.AGENT,
|
|
agent=code_reviewer,
|
|
required_inputs=["generated_code"],
|
|
output_keys=["review_comments"],
|
|
timeout=90.0,
|
|
retry_count=1,
|
|
),
|
|
Node(
|
|
id="validation",
|
|
type=NodeType.TASK, # Changed from CONDITION to TASK
|
|
callable=validate_code,
|
|
required_inputs=["generated_code"],
|
|
output_keys=["code_valid"],
|
|
),
|
|
Node(
|
|
id="test_generation",
|
|
type=NodeType.AGENT,
|
|
agent=test_generator,
|
|
required_inputs=["generated_code"],
|
|
output_keys=["test_code"],
|
|
timeout=60.0,
|
|
),
|
|
Node(
|
|
id="test_execution",
|
|
type=NodeType.TASK,
|
|
callable=run_tests,
|
|
required_inputs=["test_code"],
|
|
output_keys=["test_results"],
|
|
),
|
|
Node(
|
|
id="deployment",
|
|
type=NodeType.TASK,
|
|
callable=deploy_code,
|
|
required_inputs=["generated_code", "test_results"],
|
|
output_keys=["deployment_status"],
|
|
),
|
|
]
|
|
|
|
# Add nodes
|
|
for node in nodes:
|
|
workflow.add_node(node)
|
|
|
|
# Add edges
|
|
edges = [
|
|
Edge(source="code_generation", target="code_review"),
|
|
Edge(source="code_generation", target="validation"),
|
|
Edge(source="code_generation", target="test_generation"),
|
|
Edge(source="validation", target="deployment"), # Removed conditional edge type
|
|
Edge(source="test_generation", target="test_execution"),
|
|
Edge(source="test_execution", target="deployment"),
|
|
]
|
|
|
|
for edge in edges:
|
|
workflow.add_edge(edge)
|
|
|
|
# Set entry and end points
|
|
workflow.set_entry_points(["code_generation"])
|
|
workflow.set_end_points(["deployment"])
|
|
|
|
# Execute workflow
|
|
result = await workflow.run(
|
|
"Create a Python function that implements a binary search algorithm with proper error handling and documentation"
|
|
)
|
|
|
|
return self.end_benchmark("Software Development Pipeline", {
|
|
'workflow_type': 'software_development',
|
|
'nodes_count': len(nodes),
|
|
'edges_count': len(edges),
|
|
'result': result,
|
|
'features_used': ['agents', 'conditions', 'parallel_execution', 'state_management']
|
|
})
|
|
|
|
async def benchmark_data_processing_pipeline(self):
|
|
"""Benchmark: ETL Data Processing Pipeline with Validation and Analytics."""
|
|
self.start_benchmark("Data Processing Pipeline")
|
|
|
|
# Create workflow
|
|
workflow = GraphWorkflow(
|
|
name="ETL Data Processing Pipeline",
|
|
description="Extract, Transform, Load pipeline with data validation and analytics",
|
|
max_loops=1,
|
|
timeout=300.0,
|
|
show_dashboard=False,
|
|
auto_save=True,
|
|
state_backend="sqlite"
|
|
)
|
|
|
|
# Define data processing functions
|
|
def extract_data(**kwargs):
|
|
"""Simulate data extraction."""
|
|
# Simulate extracting data from multiple sources
|
|
return {
|
|
"raw_data": [{"id": i, "value": i * 2, "category": "A" if i % 2 == 0 else "B"}
|
|
for i in range(1, 101)],
|
|
"metadata": {"source": "database", "records": 100, "timestamp": datetime.now().isoformat()}
|
|
}
|
|
|
|
def validate_data(**kwargs):
|
|
"""Validate data quality."""
|
|
data = kwargs.get('extracted_data', {}).get('raw_data', [])
|
|
valid_records = [record for record in data if record.get('id') and record.get('value')]
|
|
return len(valid_records) >= len(data) * 0.95 # 95% quality threshold
|
|
|
|
def transform_data(**kwargs):
|
|
"""Transform and clean data."""
|
|
data = kwargs.get('extracted_data', {}).get('raw_data', [])
|
|
transformed = []
|
|
for record in data:
|
|
transformed.append({
|
|
"id": record["id"],
|
|
"processed_value": record["value"] * 1.5,
|
|
"category": record["category"],
|
|
"processed_at": datetime.now().isoformat()
|
|
})
|
|
return {"transformed_data": transformed, "transformation_stats": {"records_processed": len(transformed)}}
|
|
|
|
def analyze_data(**kwargs):
|
|
"""Perform data analytics."""
|
|
data = kwargs.get('transformed_data', {}).get('transformed_data', [])
|
|
categories = {}
|
|
total_value = 0
|
|
|
|
for record in data:
|
|
category = record["category"]
|
|
value = record["processed_value"]
|
|
categories[category] = categories.get(category, 0) + value
|
|
total_value += value
|
|
|
|
return {
|
|
"analytics": {
|
|
"total_records": len(data),
|
|
"total_value": total_value,
|
|
"category_breakdown": categories,
|
|
"average_value": total_value / len(data) if data else 0
|
|
}
|
|
}
|
|
|
|
def load_data(**kwargs):
|
|
"""Simulate loading data to destination."""
|
|
analytics = kwargs.get('analytics', {})
|
|
transformed_data = kwargs.get('transformed_data', {})
|
|
|
|
return {
|
|
"load_status": "success",
|
|
"records_loaded": transformed_data.get("transformation_stats", {}).get("records_processed", 0),
|
|
"analytics_summary": analytics.get("analytics", {})
|
|
}
|
|
|
|
# Create nodes
|
|
nodes = [
|
|
Node(
|
|
id="extract",
|
|
type=NodeType.TASK,
|
|
callable=extract_data,
|
|
output_keys=["extracted_data"],
|
|
timeout=30.0,
|
|
),
|
|
Node(
|
|
id="validate",
|
|
type=NodeType.TASK, # Changed from CONDITION to TASK
|
|
callable=validate_data,
|
|
required_inputs=["extracted_data"],
|
|
output_keys=["data_valid"],
|
|
),
|
|
Node(
|
|
id="transform",
|
|
type=NodeType.TASK, # Changed from DATA_PROCESSOR to TASK
|
|
callable=transform_data,
|
|
required_inputs=["extracted_data"],
|
|
output_keys=["transformed_data"],
|
|
timeout=45.0,
|
|
),
|
|
Node(
|
|
id="analyze",
|
|
type=NodeType.TASK,
|
|
callable=analyze_data,
|
|
required_inputs=["transformed_data"],
|
|
output_keys=["analytics"],
|
|
timeout=30.0,
|
|
),
|
|
Node(
|
|
id="load",
|
|
type=NodeType.TASK,
|
|
callable=load_data,
|
|
required_inputs=["transformed_data", "analytics"],
|
|
output_keys=["load_result"],
|
|
timeout=30.0,
|
|
),
|
|
]
|
|
|
|
# Add nodes
|
|
for node in nodes:
|
|
workflow.add_node(node)
|
|
|
|
# Add edges
|
|
edges = [
|
|
Edge(source="extract", target="validate"),
|
|
Edge(source="extract", target="transform"),
|
|
Edge(source="validate", target="load"), # Removed conditional edge type
|
|
Edge(source="transform", target="analyze"),
|
|
Edge(source="analyze", target="load"),
|
|
]
|
|
|
|
for edge in edges:
|
|
workflow.add_edge(edge)
|
|
|
|
# Set entry and end points
|
|
workflow.set_entry_points(["extract"])
|
|
workflow.set_end_points(["load"])
|
|
|
|
# Execute workflow
|
|
result = await workflow.run("Process customer transaction data for monthly analytics")
|
|
|
|
return self.end_benchmark("Data Processing Pipeline", {
|
|
'workflow_type': 'data_processing',
|
|
'nodes_count': len(nodes),
|
|
'edges_count': len(edges),
|
|
'result': result,
|
|
'features_used': ['data_processors', 'conditions', 'state_management', 'checkpointing']
|
|
})
|
|
|
|
async def benchmark_ai_ml_workflow(self):
|
|
"""Benchmark: AI/ML Model Training and Evaluation Pipeline."""
|
|
self.start_benchmark("AI/ML Workflow")
|
|
|
|
# Create mock agents
|
|
data_scientist = MockAgent(
|
|
agent_name="DataScientist",
|
|
system_prompt="You are an expert data scientist. Analyze data and suggest preprocessing steps."
|
|
)
|
|
|
|
ml_engineer = MockAgent(
|
|
agent_name="MLEngineer",
|
|
system_prompt="You are an ML engineer. Design and implement machine learning models."
|
|
)
|
|
|
|
# Create workflow
|
|
workflow = GraphWorkflow(
|
|
name="AI/ML Model Pipeline",
|
|
description="Complete ML pipeline from data analysis to model deployment",
|
|
max_loops=1,
|
|
timeout=600.0,
|
|
show_dashboard=True,
|
|
auto_save=True,
|
|
state_backend="memory" # Changed from redis to memory
|
|
)
|
|
|
|
# Define ML pipeline functions
|
|
def generate_sample_data(**kwargs):
|
|
"""Generate sample ML dataset."""
|
|
import numpy as np
|
|
np.random.seed(42)
|
|
X = np.random.randn(1000, 10)
|
|
y = np.random.randint(0, 2, 1000)
|
|
return {
|
|
"X_train": X[:800].tolist(),
|
|
"X_test": X[800:].tolist(),
|
|
"y_train": y[:800].tolist(),
|
|
"y_test": y[800:].tolist(),
|
|
"feature_names": [f"feature_{i}" for i in range(10)]
|
|
}
|
|
|
|
def preprocess_data(**kwargs):
|
|
"""Preprocess the data."""
|
|
data = kwargs.get('raw_data', {})
|
|
# Simulate preprocessing
|
|
return {
|
|
"processed_data": data,
|
|
"preprocessing_info": {
|
|
"scaling_applied": True,
|
|
"missing_values_handled": False,
|
|
"feature_engineering": "basic"
|
|
}
|
|
}
|
|
|
|
def train_model(**kwargs):
|
|
"""Simulate model training."""
|
|
data = kwargs.get('processed_data', {})
|
|
# Simulate training
|
|
return {
|
|
"model_info": {
|
|
"algorithm": "Random Forest",
|
|
"accuracy": 0.85,
|
|
"training_time": 45.2,
|
|
"hyperparameters": {"n_estimators": 100, "max_depth": 10}
|
|
},
|
|
"model_path": "/models/random_forest_v1.pkl"
|
|
}
|
|
|
|
def evaluate_model(**kwargs):
|
|
"""Evaluate model performance."""
|
|
model_info = kwargs.get('model_info', {})
|
|
accuracy = model_info.get('accuracy', 0)
|
|
return {
|
|
"evaluation_results": {
|
|
"accuracy": accuracy,
|
|
"precision": 0.83,
|
|
"recall": 0.87,
|
|
"f1_score": 0.85,
|
|
"roc_auc": 0.89
|
|
},
|
|
"model_approved": accuracy > 0.8
|
|
}
|
|
|
|
def deploy_model(**kwargs):
|
|
"""Simulate model deployment."""
|
|
evaluation = kwargs.get('evaluation_results', {})
|
|
model_info = kwargs.get('model_info', {})
|
|
|
|
if evaluation.get('model_approved', False):
|
|
return {
|
|
"deployment_status": "success",
|
|
"model_version": "v1.0",
|
|
"endpoint_url": "https://api.example.com/predict",
|
|
"performance_metrics": evaluation
|
|
}
|
|
else:
|
|
return {
|
|
"deployment_status": "rejected",
|
|
"reason": "Model accuracy below threshold"
|
|
}
|
|
|
|
# Create nodes
|
|
nodes = [
|
|
Node(
|
|
id="data_generation",
|
|
type=NodeType.TASK,
|
|
callable=generate_sample_data,
|
|
output_keys=["raw_data"],
|
|
timeout=30.0,
|
|
),
|
|
Node(
|
|
id="data_analysis",
|
|
type=NodeType.AGENT,
|
|
agent=data_scientist,
|
|
required_inputs=["raw_data"],
|
|
output_keys=["analysis_report"],
|
|
timeout=120.0,
|
|
),
|
|
Node(
|
|
id="preprocessing",
|
|
type=NodeType.TASK, # Changed from DATA_PROCESSOR to TASK
|
|
callable=preprocess_data,
|
|
required_inputs=["raw_data"],
|
|
output_keys=["processed_data"],
|
|
timeout=60.0,
|
|
),
|
|
Node(
|
|
id="model_design",
|
|
type=NodeType.AGENT,
|
|
agent=ml_engineer,
|
|
required_inputs=["analysis_report", "processed_data"],
|
|
output_keys=["model_specification"],
|
|
timeout=90.0,
|
|
),
|
|
Node(
|
|
id="training",
|
|
type=NodeType.TASK,
|
|
callable=train_model,
|
|
required_inputs=["processed_data"],
|
|
output_keys=["model_info"],
|
|
timeout=180.0,
|
|
),
|
|
Node(
|
|
id="evaluation",
|
|
type=NodeType.TASK,
|
|
callable=evaluate_model,
|
|
required_inputs=["model_info"],
|
|
output_keys=["evaluation_results"],
|
|
timeout=60.0,
|
|
),
|
|
Node(
|
|
id="deployment",
|
|
type=NodeType.TASK,
|
|
callable=deploy_model,
|
|
required_inputs=["evaluation_results", "model_info"],
|
|
output_keys=["deployment_result"],
|
|
timeout=30.0,
|
|
),
|
|
]
|
|
|
|
# Add nodes
|
|
for node in nodes:
|
|
workflow.add_node(node)
|
|
|
|
# Add edges
|
|
edges = [
|
|
Edge(source="data_generation", target="data_analysis"),
|
|
Edge(source="data_generation", target="preprocessing"),
|
|
Edge(source="data_analysis", target="model_design"),
|
|
Edge(source="preprocessing", target="model_design"),
|
|
Edge(source="preprocessing", target="training"),
|
|
Edge(source="model_design", target="training"),
|
|
Edge(source="training", target="evaluation"),
|
|
Edge(source="evaluation", target="deployment"),
|
|
]
|
|
|
|
for edge in edges:
|
|
workflow.add_edge(edge)
|
|
|
|
# Set entry and end points
|
|
workflow.set_entry_points(["data_generation"])
|
|
workflow.set_end_points(["deployment"])
|
|
|
|
# Execute workflow
|
|
result = await workflow.run("Build a machine learning model for customer churn prediction")
|
|
|
|
return self.end_benchmark("AI/ML Workflow", {
|
|
'workflow_type': 'ai_ml',
|
|
'nodes_count': len(nodes),
|
|
'edges_count': len(edges),
|
|
'result': result,
|
|
'features_used': ['agents', 'data_processors', 'parallel_execution', 'state_management']
|
|
})
|
|
|
|
async def benchmark_business_process_workflow(self):
|
|
"""Benchmark: Business Process Workflow with Approval and Notification."""
|
|
self.start_benchmark("Business Process Workflow")
|
|
|
|
# Create mock agents
|
|
analyst = MockAgent(
|
|
agent_name="BusinessAnalyst",
|
|
system_prompt="You are a business analyst. Review proposals and provide recommendations."
|
|
)
|
|
|
|
manager = MockAgent(
|
|
agent_name="Manager",
|
|
system_prompt="You are a senior manager. Make approval decisions based on business criteria."
|
|
)
|
|
|
|
# Create workflow
|
|
workflow = GraphWorkflow(
|
|
name="Business Approval Process",
|
|
description="Multi-stage business approval workflow with notifications",
|
|
max_loops=1,
|
|
timeout=300.0,
|
|
show_dashboard=False,
|
|
auto_save=True,
|
|
state_backend="file"
|
|
)
|
|
|
|
# Define business process functions
|
|
def create_proposal(**kwargs):
|
|
"""Create a business proposal."""
|
|
return {
|
|
"proposal_id": "PROP-2024-001",
|
|
"title": "New Product Launch Initiative",
|
|
"budget": 50000,
|
|
"timeline": "6 months",
|
|
"risk_level": "medium",
|
|
"expected_roi": 0.25,
|
|
"created_by": "john.doe@company.com",
|
|
"created_at": datetime.now().isoformat()
|
|
}
|
|
|
|
def validate_proposal(**kwargs):
|
|
"""Validate proposal completeness."""
|
|
proposal = kwargs.get('proposal', {})
|
|
required_fields = ['title', 'budget', 'timeline', 'expected_roi']
|
|
return all(field in proposal for field in required_fields)
|
|
|
|
def analyze_proposal(**kwargs):
|
|
"""Analyze proposal feasibility."""
|
|
proposal = kwargs.get('proposal', {})
|
|
budget = proposal.get('budget', 0)
|
|
roi = proposal.get('expected_roi', 0)
|
|
|
|
return {
|
|
"analysis": {
|
|
"budget_appropriate": budget <= 100000,
|
|
"roi_acceptable": roi >= 0.15,
|
|
"risk_assessment": "manageable" if proposal.get('risk_level') != 'high' else "high",
|
|
"recommendation": "approve" if budget <= 100000 and roi >= 0.15 else "review"
|
|
}
|
|
}
|
|
|
|
def check_budget_approval(**kwargs):
|
|
"""Check if budget requires higher approval."""
|
|
proposal = kwargs.get('proposal', {})
|
|
budget = proposal.get('budget', 0)
|
|
return budget <= 25000 # Can be approved by manager
|
|
|
|
def generate_approval_document(**kwargs):
|
|
"""Generate approval documentation."""
|
|
proposal = kwargs.get('proposal', {})
|
|
analysis = kwargs.get('analysis', {})
|
|
|
|
return {
|
|
"approval_doc": {
|
|
"proposal_id": proposal.get('proposal_id'),
|
|
"approval_status": "approved" if analysis.get('recommendation') == 'approve' else "pending",
|
|
"approval_date": datetime.now().isoformat(),
|
|
"conditions": ["budget_monitoring", "quarterly_review"],
|
|
"next_steps": ["contract_negotiation", "team_assignment"]
|
|
}
|
|
}
|
|
|
|
def send_notifications(**kwargs):
|
|
"""Send approval notifications."""
|
|
approval_doc = kwargs.get('approval_doc', {})
|
|
proposal = kwargs.get('proposal', {})
|
|
|
|
return {
|
|
"notifications": {
|
|
"stakeholders_notified": True,
|
|
"email_sent": True,
|
|
"slack_notification": True,
|
|
"recipients": [
|
|
proposal.get('created_by'),
|
|
"finance@company.com",
|
|
"legal@company.com"
|
|
]
|
|
}
|
|
}
|
|
|
|
# Create nodes
|
|
nodes = [
|
|
Node(
|
|
id="proposal_creation",
|
|
type=NodeType.TASK,
|
|
callable=create_proposal,
|
|
output_keys=["proposal"],
|
|
timeout=30.0,
|
|
),
|
|
Node(
|
|
id="validation",
|
|
type=NodeType.TASK, # Changed from CONDITION to TASK
|
|
callable=validate_proposal,
|
|
required_inputs=["proposal"],
|
|
output_keys=["proposal_valid"],
|
|
),
|
|
Node(
|
|
id="analysis",
|
|
type=NodeType.AGENT,
|
|
agent=analyst,
|
|
required_inputs=["proposal"],
|
|
output_keys=["analysis_report"],
|
|
timeout=90.0,
|
|
),
|
|
Node(
|
|
id="budget_check",
|
|
type=NodeType.TASK, # Changed from CONDITION to TASK
|
|
callable=check_budget_approval,
|
|
required_inputs=["proposal"],
|
|
output_keys=["budget_approved"],
|
|
),
|
|
Node(
|
|
id="manager_review",
|
|
type=NodeType.AGENT,
|
|
agent=manager,
|
|
required_inputs=["proposal", "analysis_report"],
|
|
output_keys=["manager_decision"],
|
|
timeout=60.0,
|
|
),
|
|
Node(
|
|
id="approval_documentation",
|
|
type=NodeType.TASK,
|
|
callable=generate_approval_document,
|
|
required_inputs=["proposal", "analysis_report"],
|
|
output_keys=["approval_doc"],
|
|
timeout=30.0,
|
|
),
|
|
Node(
|
|
id="notifications",
|
|
type=NodeType.TASK,
|
|
callable=send_notifications,
|
|
required_inputs=["approval_doc", "proposal"],
|
|
output_keys=["notification_status"],
|
|
timeout=30.0,
|
|
),
|
|
]
|
|
|
|
# Add nodes
|
|
for node in nodes:
|
|
workflow.add_node(node)
|
|
|
|
# Add edges
|
|
edges = [
|
|
Edge(source="proposal_creation", target="validation"),
|
|
Edge(source="validation", target="analysis"), # Removed conditional edge type
|
|
Edge(source="validation", target="notifications"), # Removed error edge type
|
|
Edge(source="analysis", target="budget_check"),
|
|
Edge(source="budget_check", target="manager_review"), # Removed conditional edge type
|
|
Edge(source="analysis", target="approval_documentation"),
|
|
Edge(source="manager_review", target="approval_documentation"),
|
|
Edge(source="approval_documentation", target="notifications"),
|
|
]
|
|
|
|
for edge in edges:
|
|
workflow.add_edge(edge)
|
|
|
|
# Set entry and end points
|
|
workflow.set_entry_points(["proposal_creation"])
|
|
workflow.set_end_points(["notifications"])
|
|
|
|
# Execute workflow
|
|
result = await workflow.run("Review and approve the new product launch proposal")
|
|
|
|
return self.end_benchmark("Business Process Workflow", {
|
|
'workflow_type': 'business_process',
|
|
'nodes_count': len(nodes),
|
|
'edges_count': len(edges),
|
|
'result': result,
|
|
'features_used': ['agents', 'conditions', 'error_handling', 'state_management']
|
|
})
|
|
|
|
async def benchmark_performance_stress_test(self):
|
|
"""Benchmark: Performance stress test with many parallel nodes."""
|
|
self.start_benchmark("Performance Stress Test")
|
|
|
|
# Create workflow
|
|
workflow = GraphWorkflow(
|
|
name="Performance Stress Test",
|
|
description="Stress test with multiple parallel nodes and complex dependencies",
|
|
max_loops=1,
|
|
timeout=300.0,
|
|
show_dashboard=False,
|
|
auto_save=False,
|
|
graph_engine=GraphEngine.NETWORKX # Changed from RUSTWORKX to NETWORKX
|
|
)
|
|
|
|
# Define stress test functions
|
|
def parallel_task_1(**kwargs):
|
|
"""Simulate CPU-intensive task 1."""
|
|
import time
|
|
time.sleep(0.1) # Simulate work
|
|
return {"result_1": "completed", "data_1": list(range(100))}
|
|
|
|
def parallel_task_2(**kwargs):
|
|
"""Simulate CPU-intensive task 2."""
|
|
import time
|
|
time.sleep(0.1) # Simulate work
|
|
return {"result_2": "completed", "data_2": list(range(200))}
|
|
|
|
def parallel_task_3(**kwargs):
|
|
"""Simulate CPU-intensive task 3."""
|
|
import time
|
|
time.sleep(0.1) # Simulate work
|
|
return {"result_3": "completed", "data_3": list(range(300))}
|
|
|
|
def parallel_task_4(**kwargs):
|
|
"""Simulate CPU-intensive task 4."""
|
|
import time
|
|
time.sleep(0.1) # Simulate work
|
|
return {"result_4": "completed", "data_4": list(range(400))}
|
|
|
|
def parallel_task_5(**kwargs):
|
|
"""Simulate CPU-intensive task 5."""
|
|
import time
|
|
time.sleep(0.1) # Simulate work
|
|
return {"result_5": "completed", "data_5": list(range(500))}
|
|
|
|
def merge_results(**kwargs):
|
|
"""Merge all parallel results."""
|
|
results = []
|
|
for i in range(1, 6):
|
|
result_key = f"result_{i}"
|
|
data_key = f"data_{i}"
|
|
if result_key in kwargs:
|
|
results.append({
|
|
"task": f"task_{i}",
|
|
"status": kwargs[result_key],
|
|
"data_length": len(kwargs.get(data_key, []))
|
|
})
|
|
|
|
return {
|
|
"merged_results": results,
|
|
"total_tasks": len(results),
|
|
"all_completed": all(r["status"] == "completed" for r in results)
|
|
}
|
|
|
|
def final_processing(**kwargs):
|
|
"""Final processing step."""
|
|
merged = kwargs.get('merged_results', {})
|
|
if isinstance(merged, list):
|
|
# Handle case where merged_results is a list
|
|
all_completed = all(r.get("status") == "completed" for r in merged)
|
|
total_tasks = len(merged)
|
|
else:
|
|
# Handle case where merged_results is a dict
|
|
all_completed = merged.get('all_completed', False)
|
|
total_tasks = merged.get('total_tasks', 0)
|
|
|
|
return {
|
|
"final_result": {
|
|
"success": all_completed,
|
|
"total_tasks_processed": total_tasks,
|
|
"processing_time": time.time(),
|
|
"performance_metrics": {
|
|
"parallel_efficiency": 0.95,
|
|
"throughput": "high"
|
|
}
|
|
}
|
|
}
|
|
|
|
# Create nodes
|
|
nodes = [
|
|
Node(
|
|
id="task_1",
|
|
type=NodeType.TASK,
|
|
callable=parallel_task_1,
|
|
output_keys=["result_1", "data_1"],
|
|
timeout=30.0,
|
|
parallel=True,
|
|
),
|
|
Node(
|
|
id="task_2",
|
|
type=NodeType.TASK,
|
|
callable=parallel_task_2,
|
|
output_keys=["result_2", "data_2"],
|
|
timeout=30.0,
|
|
parallel=True,
|
|
),
|
|
Node(
|
|
id="task_3",
|
|
type=NodeType.TASK,
|
|
callable=parallel_task_3,
|
|
output_keys=["result_3", "data_3"],
|
|
timeout=30.0,
|
|
parallel=True,
|
|
),
|
|
Node(
|
|
id="task_4",
|
|
type=NodeType.TASK,
|
|
callable=parallel_task_4,
|
|
output_keys=["result_4", "data_4"],
|
|
timeout=30.0,
|
|
parallel=True,
|
|
),
|
|
Node(
|
|
id="task_5",
|
|
type=NodeType.TASK,
|
|
callable=parallel_task_5,
|
|
output_keys=["result_5", "data_5"],
|
|
timeout=30.0,
|
|
parallel=True,
|
|
),
|
|
Node(
|
|
id="merge",
|
|
type=NodeType.TASK, # Changed from MERGE to TASK
|
|
callable=merge_results,
|
|
required_inputs=["result_1", "result_2", "result_3", "result_4", "result_5"],
|
|
output_keys=["merged_results"],
|
|
timeout=30.0,
|
|
),
|
|
Node(
|
|
id="final_processing",
|
|
type=NodeType.TASK,
|
|
callable=final_processing,
|
|
required_inputs=["merged_results"],
|
|
output_keys=["final_result"],
|
|
timeout=30.0,
|
|
),
|
|
]
|
|
|
|
# Add nodes
|
|
for node in nodes:
|
|
workflow.add_node(node)
|
|
|
|
# Add edges (all parallel tasks feed into merge)
|
|
edges = [
|
|
Edge(source="task_1", target="merge"),
|
|
Edge(source="task_2", target="merge"),
|
|
Edge(source="task_3", target="merge"),
|
|
Edge(source="task_4", target="merge"),
|
|
Edge(source="task_5", target="merge"),
|
|
Edge(source="merge", target="final_processing"),
|
|
]
|
|
|
|
for edge in edges:
|
|
workflow.add_edge(edge)
|
|
|
|
# Set entry and end points
|
|
workflow.set_entry_points(["task_1", "task_2", "task_3", "task_4", "task_5"])
|
|
workflow.set_end_points(["final_processing"])
|
|
|
|
# Execute workflow
|
|
result = await workflow.run("Execute parallel performance stress test")
|
|
|
|
return self.end_benchmark("Performance Stress Test", {
|
|
'workflow_type': 'performance_test',
|
|
'nodes_count': len(nodes),
|
|
'edges_count': len(edges),
|
|
'result': result,
|
|
'features_used': ['parallel_execution', 'merge_nodes', 'rustworkx_engine', 'performance_optimization']
|
|
})
|
|
|
|
async def run_all_benchmarks(self):
|
|
"""Run all benchmarks and generate comprehensive report."""
|
|
print("🎯 Starting GraphWorkflow Benchmark Suite")
|
|
print("=" * 60)
|
|
|
|
# Run all benchmarks
|
|
await self.benchmark_software_development_pipeline()
|
|
await self.benchmark_data_processing_pipeline()
|
|
await self.benchmark_ai_ml_workflow()
|
|
await self.benchmark_business_process_workflow()
|
|
await self.benchmark_performance_stress_test()
|
|
|
|
# Generate comprehensive report
|
|
self.generate_benchmark_report()
|
|
|
|
return self.results
|
|
|
|
def generate_benchmark_report(self):
|
|
"""Generate a comprehensive benchmark report."""
|
|
print("\n" + "=" * 60)
|
|
print("📊 GRAPHWORKFLOW BENCHMARK REPORT")
|
|
print("=" * 60)
|
|
|
|
total_duration = sum(result.get('duration', 0) for result in self.results.values())
|
|
total_nodes = sum(result.get('nodes_count', 0) for result in self.results.values())
|
|
total_edges = sum(result.get('edges_count', 0) for result in self.results.values())
|
|
|
|
print(f"Total Benchmarks: {len(self.results)}")
|
|
print(f"Total Duration: {total_duration:.2f}s")
|
|
print(f"Total Nodes: {total_nodes}")
|
|
print(f"Total Edges: {total_edges}")
|
|
print(f"Average Duration per Benchmark: {total_duration/len(self.results):.2f}s")
|
|
|
|
print("\n📈 Individual Benchmark Results:")
|
|
print("-" * 60)
|
|
|
|
for name, result in self.results.items():
|
|
print(f"{name:30} | {result.get('duration', 0):6.2f}s | "
|
|
f"{result.get('nodes_count', 0):3d} nodes | "
|
|
f"{result.get('edges_count', 0):3d} edges | "
|
|
f"{result.get('workflow_type', 'unknown')}")
|
|
|
|
print("\n🏆 Performance Summary:")
|
|
print("-" * 60)
|
|
|
|
# Find fastest and slowest benchmarks
|
|
fastest = min(self.results.items(), key=lambda x: x[1].get('duration', float('inf')))
|
|
slowest = max(self.results.items(), key=lambda x: x[1].get('duration', 0))
|
|
|
|
print(f"Fastest Benchmark: {fastest[0]} ({fastest[1].get('duration', 0):.2f}s)")
|
|
print(f"Slowest Benchmark: {slowest[0]} ({slowest[1].get('duration', 0):.2f}s)")
|
|
|
|
# Feature usage analysis
|
|
all_features = set()
|
|
for result in self.results.values():
|
|
features = result.get('features_used', [])
|
|
all_features.update(features)
|
|
|
|
print(f"\n🔧 Features Tested: {', '.join(sorted(all_features))}")
|
|
|
|
# Save detailed results to file
|
|
report_data = {
|
|
"summary": {
|
|
"total_benchmarks": len(self.results),
|
|
"total_duration": total_duration,
|
|
"total_nodes": total_nodes,
|
|
"total_edges": total_edges,
|
|
"average_duration": total_duration/len(self.results)
|
|
},
|
|
"benchmarks": self.results,
|
|
"features_tested": list(all_features),
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
|
|
with open("graphworkflow_benchmark_report.json", "w") as f:
|
|
json.dump(report_data, f, indent=2)
|
|
|
|
print(f"\n📄 Detailed report saved to: graphworkflow_benchmark_report.json")
|
|
|
|
|
|
async def main():
|
|
"""Main function to run all benchmarks."""
|
|
benchmarks = GraphWorkflowBenchmarks()
|
|
results = await benchmarks.run_all_benchmarks()
|
|
return results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run benchmarks
|
|
asyncio.run(main()) |