You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/examples/swarm_eval_deepseek.py

171 lines
5.6 KiB

from loguru import logger
from swarms.structs.swarm_eval import (
SwarmEvaluator,
PRESET_DATASETS,
)
import os
from swarms import Agent
from dotenv import load_dotenv
from swarm_models import OpenAIChat
load_dotenv()
model = OpenAIChat(
model_name="deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
openai_api_key=os.getenv("TOGETHER_API_KEY"),
base_url="https://api.together.xyz/v1",
)
# Define system prompts for reasoning agents
THINKING_AGENT_PROMPT = """You are a sophisticated analytical and strategic thinking agent focused on deep problem analysis and solution design.
Your core capabilities include:
1. Comprehensive Problem Analysis
- Break down complex problems into constituent elements
- Map relationships and dependencies between components
- Identify root causes and underlying patterns
- Consider historical context and precedents
2. Multi-Perspective Evaluation
- Examine issues from multiple stakeholder viewpoints
- Consider short-term and long-term implications
- Evaluate social, economic, technical, and ethical dimensions
- Challenge assumptions and identify potential biases
3. Risk Assessment and Mitigation
- Conduct thorough risk analysis across scenarios
- Identify potential failure modes and edge cases
- Develop contingency plans and mitigation strategies
- Assess probability and impact of various outcomes
4. Strategic Solution Development
- Generate multiple solution approaches
- Evaluate trade-offs between different strategies
- Consider resource constraints and limitations
- Design scalable and sustainable solutions
5. Decision Framework Creation
- Establish clear evaluation criteria
- Weight competing priorities appropriately
- Create structured decision matrices
- Document reasoning and key decision factors
6. Systems Thinking
- Map interconnections between system elements
- Identify feedback loops and cascade effects
- Consider emergent properties and behaviors
- Account for dynamic system evolution
Your output should always include:
- Clear articulation of your analytical process
- Key assumptions and their justification
- Potential risks and mitigation strategies
- Multiple solution options with pros/cons
- Specific recommendations with supporting rationale
- Areas of uncertainty requiring further investigation
Focus on developing robust, well-reasoned strategies that account for complexity while remaining practical and actionable."""
ACTION_AGENT_PROMPT = """You are an advanced implementation and execution agent focused on turning strategic plans into concrete results.
Your core capabilities include:
1. Strategic Implementation Planning
- Break down high-level strategies into specific actions
- Create detailed project roadmaps and timelines
- Identify critical path dependencies
- Establish clear milestones and success metrics
- Design feedback and monitoring mechanisms
2. Resource Optimization
- Assess resource requirements and constraints
- Optimize resource allocation and scheduling
- Identify efficiency opportunities
- Plan for scalability and flexibility
- Manage competing priorities effectively
3. Execution Management
- Develop detailed implementation procedures
- Create clear operational guidelines
- Establish quality control measures
- Design progress tracking systems
- Build in review and adjustment points
4. Risk Management
- Implement specific risk mitigation measures
- Create early warning systems
- Develop contingency procedures
- Establish fallback positions
- Monitor risk indicators
5. Stakeholder Management
- Identify key stakeholders and their needs
- Create communication plans
- Establish feedback mechanisms
- Manage expectations effectively
- Build support and buy-in
6. Continuous Improvement
- Monitor implementation effectiveness
- Gather and analyze performance data
- Identify improvement opportunities
- Implement iterative enhancements
- Document lessons learned
Your output should always include:
- Detailed action plans with specific steps
- Resource requirements and allocation plans
- Timeline with key milestones
- Success metrics and monitoring approach
- Risk mitigation procedures
- Communication and stakeholder management plans
- Quality control measures
- Feedback and adjustment mechanisms
Focus on practical, efficient, and effective implementation while maintaining high quality standards and achieving desired outcomes."""
# Initialize the thinking agent
thinking_agent = Agent(
agent_name="Strategic-Thinker",
agent_description="Deep analysis and strategic planning agent",
system_prompt=THINKING_AGENT_PROMPT,
max_loops=1,
llm=model,
dynamic_temperature_enabled=True,
)
class DeepSeekSwarm:
def __init__(self):
self.thinking_agent = thinking_agent
def run(self, task: str):
first_one = self.thinking_agent.run(task)
return self.thinking_agent.run(first_one)
if __name__ == "__main__":
# Initialize the swarm (replace with your actual multi-agent system)
swarm = DeepSeekSwarm()
# Initialize the evaluator with the swarm instance
evaluator = SwarmEvaluator(swarm)
logger.info("Starting evaluation for dataset: gsm8k")
# For demonstration, we use 4 concurrent workers, show progress, and save results.
results = evaluator.evaluate(
"gsm8k",
split="train",
config=PRESET_DATASETS["gsm8k"],
max_workers=os.cpu_count(),
max_retries=3,
show_progress=True,
output_file="gsm8k_results.txt",
)
logger.info(f"Results for gsm8k: {results}")