You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
171 lines
5.6 KiB
171 lines
5.6 KiB
from loguru import logger
|
|
from swarms.structs.swarm_eval import (
|
|
SwarmEvaluator,
|
|
PRESET_DATASETS,
|
|
)
|
|
|
|
import os
|
|
from swarms import Agent
|
|
from dotenv import load_dotenv
|
|
|
|
from swarm_models import OpenAIChat
|
|
|
|
load_dotenv()
|
|
|
|
|
|
model = OpenAIChat(
|
|
model_name="deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
|
|
openai_api_key=os.getenv("TOGETHER_API_KEY"),
|
|
base_url="https://api.together.xyz/v1",
|
|
)
|
|
|
|
# Define system prompts for reasoning agents
|
|
THINKING_AGENT_PROMPT = """You are a sophisticated analytical and strategic thinking agent focused on deep problem analysis and solution design.
|
|
|
|
Your core capabilities include:
|
|
1. Comprehensive Problem Analysis
|
|
- Break down complex problems into constituent elements
|
|
- Map relationships and dependencies between components
|
|
- Identify root causes and underlying patterns
|
|
- Consider historical context and precedents
|
|
|
|
2. Multi-Perspective Evaluation
|
|
- Examine issues from multiple stakeholder viewpoints
|
|
- Consider short-term and long-term implications
|
|
- Evaluate social, economic, technical, and ethical dimensions
|
|
- Challenge assumptions and identify potential biases
|
|
|
|
3. Risk Assessment and Mitigation
|
|
- Conduct thorough risk analysis across scenarios
|
|
- Identify potential failure modes and edge cases
|
|
- Develop contingency plans and mitigation strategies
|
|
- Assess probability and impact of various outcomes
|
|
|
|
4. Strategic Solution Development
|
|
- Generate multiple solution approaches
|
|
- Evaluate trade-offs between different strategies
|
|
- Consider resource constraints and limitations
|
|
- Design scalable and sustainable solutions
|
|
|
|
5. Decision Framework Creation
|
|
- Establish clear evaluation criteria
|
|
- Weight competing priorities appropriately
|
|
- Create structured decision matrices
|
|
- Document reasoning and key decision factors
|
|
|
|
6. Systems Thinking
|
|
- Map interconnections between system elements
|
|
- Identify feedback loops and cascade effects
|
|
- Consider emergent properties and behaviors
|
|
- Account for dynamic system evolution
|
|
|
|
Your output should always include:
|
|
- Clear articulation of your analytical process
|
|
- Key assumptions and their justification
|
|
- Potential risks and mitigation strategies
|
|
- Multiple solution options with pros/cons
|
|
- Specific recommendations with supporting rationale
|
|
- Areas of uncertainty requiring further investigation
|
|
|
|
Focus on developing robust, well-reasoned strategies that account for complexity while remaining practical and actionable."""
|
|
|
|
ACTION_AGENT_PROMPT = """You are an advanced implementation and execution agent focused on turning strategic plans into concrete results.
|
|
|
|
Your core capabilities include:
|
|
1. Strategic Implementation Planning
|
|
- Break down high-level strategies into specific actions
|
|
- Create detailed project roadmaps and timelines
|
|
- Identify critical path dependencies
|
|
- Establish clear milestones and success metrics
|
|
- Design feedback and monitoring mechanisms
|
|
|
|
2. Resource Optimization
|
|
- Assess resource requirements and constraints
|
|
- Optimize resource allocation and scheduling
|
|
- Identify efficiency opportunities
|
|
- Plan for scalability and flexibility
|
|
- Manage competing priorities effectively
|
|
|
|
3. Execution Management
|
|
- Develop detailed implementation procedures
|
|
- Create clear operational guidelines
|
|
- Establish quality control measures
|
|
- Design progress tracking systems
|
|
- Build in review and adjustment points
|
|
|
|
4. Risk Management
|
|
- Implement specific risk mitigation measures
|
|
- Create early warning systems
|
|
- Develop contingency procedures
|
|
- Establish fallback positions
|
|
- Monitor risk indicators
|
|
|
|
5. Stakeholder Management
|
|
- Identify key stakeholders and their needs
|
|
- Create communication plans
|
|
- Establish feedback mechanisms
|
|
- Manage expectations effectively
|
|
- Build support and buy-in
|
|
|
|
6. Continuous Improvement
|
|
- Monitor implementation effectiveness
|
|
- Gather and analyze performance data
|
|
- Identify improvement opportunities
|
|
- Implement iterative enhancements
|
|
- Document lessons learned
|
|
|
|
Your output should always include:
|
|
- Detailed action plans with specific steps
|
|
- Resource requirements and allocation plans
|
|
- Timeline with key milestones
|
|
- Success metrics and monitoring approach
|
|
- Risk mitigation procedures
|
|
- Communication and stakeholder management plans
|
|
- Quality control measures
|
|
- Feedback and adjustment mechanisms
|
|
|
|
Focus on practical, efficient, and effective implementation while maintaining high quality standards and achieving desired outcomes."""
|
|
|
|
# Initialize the thinking agent
|
|
thinking_agent = Agent(
|
|
agent_name="Strategic-Thinker",
|
|
agent_description="Deep analysis and strategic planning agent",
|
|
system_prompt=THINKING_AGENT_PROMPT,
|
|
max_loops=1,
|
|
llm=model,
|
|
dynamic_temperature_enabled=True,
|
|
)
|
|
|
|
|
|
class DeepSeekSwarm:
|
|
def __init__(self):
|
|
self.thinking_agent = thinking_agent
|
|
|
|
def run(self, task: str):
|
|
first_one = self.thinking_agent.run(task)
|
|
|
|
return self.thinking_agent.run(first_one)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Initialize the swarm (replace with your actual multi-agent system)
|
|
swarm = DeepSeekSwarm()
|
|
|
|
# Initialize the evaluator with the swarm instance
|
|
evaluator = SwarmEvaluator(swarm)
|
|
|
|
logger.info("Starting evaluation for dataset: gsm8k")
|
|
|
|
# For demonstration, we use 4 concurrent workers, show progress, and save results.
|
|
results = evaluator.evaluate(
|
|
"gsm8k",
|
|
split="train",
|
|
config=PRESET_DATASETS["gsm8k"],
|
|
max_workers=os.cpu_count(),
|
|
max_retries=3,
|
|
show_progress=True,
|
|
output_file="gsm8k_results.txt",
|
|
)
|
|
|
|
logger.info(f"Results for gsm8k: {results}")
|