diff --git a/auto_agent.py b/auto_agent.py new file mode 100644 index 00000000..92f5ffa9 --- /dev/null +++ b/auto_agent.py @@ -0,0 +1,15 @@ +from swarms import Agent + +# Initialize the agent +agent = Agent( + agent_name="Quantitative-Trading-Agent", + agent_description="Advanced quantitative trading and algorithmic analysis agent", + model_name="gpt-4.1", + max_loops="auto", +) + +out = agent.run( + task="What are the top five best energy stocks across nuclear, solar, gas, and other energy sources?", +) + +print(out) diff --git a/docs/swarms/concept/swarm_architectures.md b/docs/swarms/concept/swarm_architectures.md index 90357636..226a6b45 100644 --- a/docs/swarms/concept/swarm_architectures.md +++ b/docs/swarms/concept/swarm_architectures.md @@ -43,7 +43,7 @@ Multi-agent architectures leverage these communication patterns to ensure that a | Hybrid Hierarchical Cluster | Combines hierarchical and peer-to-peer communication patterns for complex workflows. | [Learn More](https://docs.swarms.world/en/latest/swarms/structs/hhcs/) | Complex enterprise workflows, multi-department coordination | | Batched Grid Workflow | Executes tasks in a batched grid format, where each agent processes a different task simultaneously in parallel. | [Learn More](https://docs.swarms.world/en/latest/swarms/structs/batched_grid_workflow/) | Parallel task processing, batch operations, grid-based task distribution | | LLM Council | Orchestrates multiple specialized LLM agents to collaboratively answer queries through structured peer review and synthesis. | [Learn More](https://docs.swarms.world/en/latest/swarms/structs/llm_council/) | Multi-model evaluation, peer review systems, collaborative AI decision-making | -| Debate with Judge | A debate architecture where two agents (Pro and Con) debate a topic, with a Judge agent evaluating arguments and providing refined synthesis over multiple rounds. | [Learn More](https://docs.swarms.world/en/latest/swarms/structs/debate_with_judge/) | Argument analysis, decision refinement, structured debates, iterative improvement | +| Debate with Judge | A debate architecture with Pro and Con agents debating topics, evaluated by a Judge. Supports preset agents, agent lists, or individual configuration for flexible setup. | [Learn More](https://docs.swarms.world/en/latest/swarms/structs/debate_with_judge/) | Argument analysis, decision refinement, structured debates, iterative improvement | | Self MoA Seq | Sequential self-mixture of agents that generates multiple candidate responses and synthesizes them sequentially using a sliding window approach. | [Learn More](https://docs.swarms.world/en/latest/swarms/structs/self_moa_seq/) | High-quality response generation, ensemble methods, sequential synthesis | | Swarm Rearrange | Orchestrates multiple swarms in sequential or parallel flow patterns, providing thread-safe operations for managing swarm execution. | [Learn More](https://docs.swarms.world/en/latest/swarms/structs/swarm_rearrange/) | Multi-swarm coordination, complex workflow orchestration, swarm composition | @@ -688,7 +688,7 @@ graph TD ### Debate with Judge **Overview:** -Debate architecture with self-refinement through a judge agent, enabling Pro and Con agents to debate a topic with iterative refinement. The judge evaluates arguments and provides synthesis for progressive improvement. +Debate architecture with self-refinement through a judge agent, enabling Pro and Con agents to debate a topic with iterative refinement. The judge evaluates arguments and provides synthesis for progressive improvement. Supports preset agents for quick setup, agent lists, or individual agent configuration. **Use Cases:** @@ -701,6 +701,12 @@ Debate architecture with self-refinement through a judge agent, enabling Pro and - Multi-perspective analysis +**Initialization Options:** + +- `preset_agents=True`: Use built-in optimized agents (simplest) +- `agents=[pro, con, judge]`: Provide a list of 3 agents +- Individual parameters: `pro_agent`, `con_agent`, `judge_agent` + **[Learn More](https://docs.swarms.world/en/latest/swarms/structs/debate_with_judge/)** ```mermaid @@ -717,7 +723,7 @@ graph TD G --> H H --> I[Judge Synthesis] - I --> J{More Rounds?} + I --> J{More Loops?} J -->|Yes| C J -->|No| K[Final Output] ``` diff --git a/docs/swarms/structs/debate_with_judge.md b/docs/swarms/structs/debate_with_judge.md index 89341f77..716ad90c 100644 --- a/docs/swarms/structs/debate_with_judge.md +++ b/docs/swarms/structs/debate_with_judge.md @@ -29,6 +29,7 @@ graph TD | Judge Agent | An impartial evaluator that analyzes both arguments and provides synthesis | | Iterative Refinement | The process repeats for multiple rounds, each round building upon the judge's previous synthesis | | Progressive Improvement | Each round refines the answer by incorporating feedback and addressing weaknesses | +| Preset Agents | Built-in optimized agents that can be used without manual configuration | ## Class Definition: `DebateWithJudge` @@ -36,12 +37,15 @@ graph TD class DebateWithJudge: def __init__( self, - pro_agent: Agent, - con_agent: Agent, - judge_agent: Agent, - max_rounds: int = 3, + pro_agent: Optional[Agent] = None, + con_agent: Optional[Agent] = None, + judge_agent: Optional[Agent] = None, + agents: Optional[List[Agent]] = None, + preset_agents: bool = False, + max_loops: int = 3, output_type: str = "str-all-except-first", verbose: bool = True, + model_name: str = "gpt-4o-mini", ): ``` @@ -49,12 +53,73 @@ class DebateWithJudge: | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `pro_agent` | `Agent` | Required | The agent arguing in favor (Pro position) | -| `con_agent` | `Agent` | Required | The agent arguing against (Con position) | -| `judge_agent` | `Agent` | Required | The judge agent that evaluates arguments and provides synthesis | -| `max_rounds` | `int` | `3` | Maximum number of debate rounds to execute | +| `pro_agent` | `Optional[Agent]` | `None` | The agent arguing in favor (Pro position). Not required if using `agents` list or `preset_agents`. | +| `con_agent` | `Optional[Agent]` | `None` | The agent arguing against (Con position). Not required if using `agents` list or `preset_agents`. | +| `judge_agent` | `Optional[Agent]` | `None` | The judge agent that evaluates arguments and provides synthesis. Not required if using `agents` list or `preset_agents`. | +| `agents` | `Optional[List[Agent]]` | `None` | A list of exactly 3 agents in order: `[pro_agent, con_agent, judge_agent]`. Takes precedence over individual agent parameters. | +| `preset_agents` | `bool` | `False` | If `True`, creates default Pro, Con, and Judge agents automatically with optimized system prompts. | +| `max_loops` | `int` | `3` | Maximum number of debate rounds to execute | | `output_type` | `str` | `"str-all-except-first"` | Format for the output conversation history | | `verbose` | `bool` | `True` | Whether to enable verbose logging | +| `model_name` | `str` | `"gpt-4o-mini"` | The model name to use for preset agents | + +### Initialization Options + +The `DebateWithJudge` class supports three ways to configure agents: + +#### Option 1: Preset Agents (Simplest) + +Use built-in agents with optimized system prompts for debates: + +```python +from swarms import DebateWithJudge + +# Create debate system with preset agents +debate = DebateWithJudge( + preset_agents=True, + max_loops=3, + model_name="gpt-4o-mini" # Optional: specify model +) + +result = debate.run("Should AI be regulated?") +``` + +#### Option 2: List of Agents + +Provide a list of exactly 3 agents (Pro, Con, Judge): + +```python +from swarms import Agent, DebateWithJudge + +# Create your custom agents +agents = [pro_agent, con_agent, judge_agent] + +# Create debate system with agent list +debate = DebateWithJudge( + agents=agents, + max_loops=3 +) + +result = debate.run("Is remote work better than office work?") +``` + +#### Option 3: Individual Agent Parameters + +Provide each agent separately (original behavior): + +```python +from swarms import Agent, DebateWithJudge + +# Create debate system with individual agents +debate = DebateWithJudge( + pro_agent=my_pro_agent, + con_agent=my_con_agent, + judge_agent=my_judge_agent, + max_loops=3 +) + +result = debate.run("Should we colonize Mars?") +``` ## API Reference @@ -94,7 +159,71 @@ def run(self, task: str) -> Union[str, List, dict] - **Topic Refinement**: Judge's synthesis becomes the topic for the next round 4. **Result Formatting**: Returns the final result formatted according to `output_type` -**Example:** +**Example 1: Using Preset Agents (Simplest):** + +```python +from swarms import DebateWithJudge + +# Create the DebateWithJudge system with preset agents +debate_system = DebateWithJudge( + preset_agents=True, + max_loops=3, + output_type="str-all-except-first", + verbose=True, +) + +# Define the debate topic +topic = ( + "Should artificial intelligence be regulated by governments? " + "Discuss the balance between innovation and safety." +) + +# Run the debate +result = debate_system.run(task=topic) +print(result) +``` + +**Example 2: Using Agent List:** + +```python +from swarms import Agent, DebateWithJudge + +# Create custom agents +pro_agent = Agent( + agent_name="Pro-Agent", + system_prompt="You are a skilled debater who argues in favor of positions...", + model_name="gpt-4o-mini", + max_loops=1, +) + +con_agent = Agent( + agent_name="Con-Agent", + system_prompt="You are a skilled debater who argues against positions...", + model_name="gpt-4o-mini", + max_loops=1, +) + +judge_agent = Agent( + agent_name="Judge-Agent", + system_prompt="You are an impartial judge who evaluates debates...", + model_name="gpt-4o-mini", + max_loops=1, +) + +# Create the DebateWithJudge system using agent list +debate_system = DebateWithJudge( + agents=[pro_agent, con_agent, judge_agent], + max_loops=3, + output_type="str-all-except-first", + verbose=True, +) + +# Run the debate +result = debate_system.run(task="Should AI be regulated?") +print(result) +``` + +**Example 3: Using Individual Agent Parameters:** ```python from swarms import Agent, DebateWithJudge @@ -143,7 +272,7 @@ debate_system = DebateWithJudge( pro_agent=pro_agent, con_agent=con_agent, judge_agent=judge_agent, - max_rounds=3, + max_loops=3, output_type="str-all-except-first", verbose=True, ) @@ -282,9 +411,10 @@ print(final_answer) | `pro_agent` | `Agent` | The agent arguing in favor (Pro position) | | `con_agent` | `Agent` | The agent arguing against (Con position) | | `judge_agent` | `Agent` | The judge agent that evaluates arguments | -| `max_rounds` | `int` | Maximum number of debate rounds | +| `max_loops` | `int` | Maximum number of debate rounds | | `output_type` | `str` | Format for returned results | | `verbose` | `bool` | Whether verbose logging is enabled | +| `model_name` | `str` | Model name used for preset agents | | `conversation` | `Conversation` | Conversation history management object | ## Output Types @@ -301,6 +431,21 @@ The `output_type` parameter controls how the conversation history is formatted: ## Usage Patterns +### Quick Start with Preset Agents + +The fastest way to get started - no agent configuration needed: + +```python +from swarms import DebateWithJudge + +# Create debate system with built-in optimized agents +debate = DebateWithJudge(preset_agents=True, max_loops=3) + +# Run a debate +result = debate.run("Should universal basic income be implemented?") +print(result) +``` + ### Single Topic Debate For focused debate and refinement on a single complex topic: @@ -314,6 +459,26 @@ debate_system.output_type = "dict" result = debate_system.run("Should universal basic income be implemented?") ``` +### Using Agent List + +Pass a list of 3 agents for flexible configuration: + +```python +from swarms import Agent, DebateWithJudge + +# Create or obtain agents from various sources +my_agents = [pro_agent, con_agent, judge_agent] + +# Create debate with agent list +debate = DebateWithJudge( + agents=my_agents, + max_loops=3, + verbose=True +) + +result = debate.run("Is nuclear energy the solution to climate change?") +``` + ### Batch Processing For processing multiple related topics sequentially: @@ -359,14 +524,45 @@ technical_debate = DebateWithJudge( pro_agent=technical_pro, con_agent=technical_con, judge_agent=technical_judge, - max_rounds=5, # More rounds for complex technical topics + max_loops=5, # More rounds for complex technical topics verbose=True, ) ``` ## Usage Examples -### Example 1: Policy Debate on AI Regulation +### Example 1: Quick Start with Preset Agents + +The simplest way to use `DebateWithJudge` - no manual agent configuration needed: + +```python +from swarms import DebateWithJudge + +# Create the DebateWithJudge system with preset agents +debate_system = DebateWithJudge( + preset_agents=True, + max_loops=3, + model_name="gpt-4o-mini", # Specify model for preset agents + output_type="str-all-except-first", + verbose=True, +) + +# Define the debate topic +topic = ( + "Should artificial intelligence be regulated by governments? " + "Discuss the balance between innovation and safety." +) + +# Run the debate +result = debate_system.run(task=topic) +print(result) + +# Get the final refined answer +final_answer = debate_system.get_final_answer() +print(final_answer) +``` + +### Example 2: Policy Debate with Custom Agents This example demonstrates using `DebateWithJudge` for a comprehensive policy debate on AI regulation, with multiple rounds of refinement. @@ -425,7 +621,7 @@ debate_system = DebateWithJudge( pro_agent=pro_agent, con_agent=con_agent, judge_agent=judge_agent, - max_rounds=3, + max_loops=3, output_type="str-all-except-first", verbose=True, ) @@ -448,7 +644,47 @@ final_answer = debate_system.get_final_answer() print(final_answer) ``` -### Example 2: Technical Architecture Debate with Batch Processing +### Example 3: Using Agent List + +This example demonstrates using the `agents` list parameter to provide agents: + +```python +from swarms import Agent, DebateWithJudge + +# Create your agents +pro = Agent( + agent_name="Microservices-Pro", + system_prompt="You advocate for microservices architecture...", + model_name="gpt-4o-mini", + max_loops=1, +) + +con = Agent( + agent_name="Monolith-Pro", + system_prompt="You advocate for monolithic architecture...", + model_name="gpt-4o-mini", + max_loops=1, +) + +judge = Agent( + agent_name="Architecture-Judge", + system_prompt="You evaluate architecture debates...", + model_name="gpt-4o-mini", + max_loops=1, +) + +# Create debate with agent list +debate = DebateWithJudge( + agents=[pro, con, judge], # Pass as list + max_loops=2, + verbose=True, +) + +result = debate.run("Should a startup use microservices or monolithic architecture?") +print(result) +``` + +### Example 4: Technical Architecture Debate with Batch Processing This example demonstrates using `batched_run` to process multiple technical architecture questions, comparing different approaches to system design. @@ -497,7 +733,7 @@ architecture_debate = DebateWithJudge( pro_agent=pro_agent, con_agent=con_agent, judge_agent=judge_agent, - max_rounds=2, # Fewer rounds for more focused technical debates + max_loops=2, # Fewer rounds for more focused technical debates output_type="str-all-except-first", verbose=True, ) @@ -518,7 +754,7 @@ for result in results: print(result) ``` -### Example 3: Business Strategy Debate with Custom Configuration +### Example 5: Business Strategy Debate with Custom Configuration This example demonstrates a business strategy debate with custom agent configurations, multiple rounds, and accessing conversation history. @@ -575,7 +811,7 @@ strategy_debate = DebateWithJudge( pro_agent=pro_agent, con_agent=con_agent, judge_agent=judge_agent, - max_rounds=4, # More rounds for complex strategic discussions + max_loops=4, # More rounds for complex strategic discussions output_type="dict", # Use dict format for structured analysis verbose=True, ) @@ -609,18 +845,27 @@ print(final_answer) ### Agent Configuration !!! tip "Agent Configuration Best Practices" + - **Preset Agents**: Use `preset_agents=True` for quick setup with optimized prompts + - **Custom Agents**: For specialized domains, create custom agents with domain-specific prompts - **Pro Agent**: Should be configured with expertise in the topic area and strong argumentation skills - **Con Agent**: Should be configured to identify weaknesses and provide compelling alternatives - **Judge Agent**: Should be configured with broad expertise and impartial evaluation capabilities - Use appropriate models for the complexity of the debate topic - Consider using more powerful models for the Judge agent -### Round Configuration +### Initialization Strategy + +!!! info "Choosing an Initialization Method" + - **`preset_agents=True`**: Best for quick prototyping and general-purpose debates + - **`agents=[...]` list**: Best when you have agents from external sources or dynamic creation + - **Individual parameters**: Best for maximum control and explicit configuration + +### Loop Configuration -!!! note "Round Configuration Tips" - - Use 2-3 rounds for most topics - - Use 4-5 rounds for complex, multi-faceted topics - - More rounds allow for deeper refinement but increase execution time +!!! note "Loop Configuration Tips" + - Use 2-3 loops (`max_loops`) for most topics + - Use 4-5 loops for complex, multi-faceted topics + - More loops allow for deeper refinement but increase execution time - Consider the trade-off between refinement quality and cost ### Output Format Selection @@ -646,25 +891,31 @@ print(final_answer) !!! danger "Common Problems" **Issue**: Agents not following their roles - **Solution**: Ensure system prompts clearly define each agent's role and expertise + **Solution**: Ensure system prompts clearly define each agent's role and expertise. Consider using `preset_agents=True` for well-tested prompts. --- - **Issue**: Judge synthesis not improving over rounds + **Issue**: Judge synthesis not improving over loops - **Solution**: Increase `max_rounds` or improve Judge agent's system prompt to emphasize refinement + **Solution**: Increase `max_loops` or improve Judge agent's system prompt to emphasize refinement --- **Issue**: Debate results are too generic - **Solution**: Use more specific system prompts and provide detailed context in the task + **Solution**: Use more specific system prompts and provide detailed context in the task. Custom agents often produce better domain-specific results. --- **Issue**: Execution time is too long - **Solution**: Reduce `max_rounds`, use faster models, or process fewer topics in batch + **Solution**: Reduce `max_loops`, use faster models, or process fewer topics in batch + + --- + + **Issue**: ValueError when initializing + + **Solution**: Ensure you provide one of: (1) all three agents, (2) an agents list with exactly 3 agents, or (3) `preset_agents=True` ## Contributing diff --git a/example.py b/example.py index 203d4b4b..829d90c9 100644 --- a/example.py +++ b/example.py @@ -6,12 +6,15 @@ agent = Agent( agent_description="Advanced quantitative trading and algorithmic analysis agent", model_name="gpt-4.1", dynamic_temperature_enabled=True, - max_loops=1, + max_loops=5, dynamic_context_window=True, - streaming_on=False, top_p=None, + streaming_on=True, + interactive=True, ) -out = agent.run(task="What are the top five best energy stocks across nuclear, solar, gas, and other energy sources?",) +out = agent.run( + task="What are the top five best energy stocks across nuclear, solar, gas, and other energy sources?", +) print(out) diff --git a/examples/multi_agent/debate_examples/README.md b/examples/multi_agent/debate_examples/README.md index 83241c27..058518ab 100644 --- a/examples/multi_agent/debate_examples/README.md +++ b/examples/multi_agent/debate_examples/README.md @@ -15,7 +15,7 @@ The `DebateWithJudge` architecture implements a debate system with self-refineme - **Agent A (Pro)** and **Agent B (Con)** present opposing arguments - Both arguments are evaluated by a **Judge/Critic Agent** - The Judge provides a winner or synthesis → refined answer -- The process repeats for N rounds to progressively improve the answer +- The process repeats for N loops to progressively improve the answer **Architecture Flow:** ``` @@ -28,10 +28,48 @@ Agent A (Pro) ↔ Agent B (Con) Winner or synthesis → refined answer ``` -**Example Usage:** +**Initialization Options:** + +The `DebateWithJudge` class supports three ways to configure agents: + +1. **Preset Agents** (simplest): Use built-in optimized agents +2. **Agent List**: Provide a list of 3 agents `[pro, con, judge]` +3. **Individual Parameters**: Provide each agent separately + +**Quick Start with Preset Agents:** +```python +from swarms import DebateWithJudge + +# Create debate system with built-in agents (simplest approach) +debate = DebateWithJudge( + preset_agents=True, + max_loops=3, + model_name="gpt-4o-mini" +) + +# Run debate +result = debate.run("Should AI be regulated?") +``` + +**Using Agent List:** ```python -from swarms import Agent -from swarms.structs.debate_with_judge import DebateWithJudge +from swarms import Agent, DebateWithJudge + +# Create your agents +agents = [pro_agent, con_agent, judge_agent] + +# Create debate system with agent list +debate = DebateWithJudge( + agents=agents, + max_loops=3 +) + +result = debate.run("Should AI be regulated?") +``` + +**Using Individual Agent Parameters:** +```python +from swarms import Agent, DebateWithJudge # Create Pro, Con, and Judge agents pro_agent = Agent(agent_name="Pro-Agent", ...) @@ -43,12 +81,19 @@ debate = DebateWithJudge( pro_agent=pro_agent, con_agent=con_agent, judge_agent=judge_agent, - max_rounds=3 + max_loops=3 ) # Run debate result = debate.run("Should AI be regulated?") ``` -See [debate_with_judge_example.py](./debate_with_judge_example.py) for a complete example. +## Example Files + +| File | Description | +|------|-------------| +| [debate_with_judge_example.py](./debate_with_judge_example.py) | Complete example showing all initialization methods | +| [policy_debate_example.py](./policy_debate_example.py) | Policy debate on AI regulation | +| [technical_architecture_debate_example.py](./technical_architecture_debate_example.py) | Technical architecture debate with batch processing | +| [business_strategy_debate_example.py](./business_strategy_debate_example.py) | Business strategy debate with conversation history | diff --git a/examples/multi_agent/debate_examples/business_strategy_debate_example.py b/examples/multi_agent/debate_examples/business_strategy_debate_example.py index 7dd44c11..61478eef 100644 --- a/examples/multi_agent/debate_examples/business_strategy_debate_example.py +++ b/examples/multi_agent/debate_examples/business_strategy_debate_example.py @@ -52,12 +52,12 @@ judge_agent = Agent( max_loops=1, ) -# Create the debate system with extended rounds for complex strategy discussions +# Create the debate system with extended loops for complex strategy discussions strategy_debate = DebateWithJudge( pro_agent=pro_agent, con_agent=con_agent, judge_agent=judge_agent, - max_rounds=4, # More rounds for complex strategic discussions + max_loops=4, # More loops for complex strategic discussions output_type="dict", # Use dict format for structured analysis verbose=True, ) diff --git a/examples/multi_agent/debate_examples/debate_with_judge_example.py b/examples/multi_agent/debate_examples/debate_with_judge_example.py index 663f88e9..e48cabd5 100644 --- a/examples/multi_agent/debate_examples/debate_with_judge_example.py +++ b/examples/multi_agent/debate_examples/debate_with_judge_example.py @@ -1,61 +1,17 @@ -from swarms import Agent, DebateWithJudge -# Create the Pro agent (arguing in favor) -pro_agent = Agent( - agent_name="Pro-Agent", - system_prompt=( - "You are a skilled debater who argues in favor of positions. " - "You present well-reasoned arguments with evidence, examples, " - "and logical reasoning. You are persuasive and articulate." - ), - model_name="gpt-4o-mini", - max_loops=1, -) - -# Create the Con agent (arguing against) -con_agent = Agent( - agent_name="Con-Agent", - system_prompt=( - "You are a skilled debater who argues against positions. " - "You present strong counter-arguments with evidence, examples, " - "and logical reasoning. You identify weaknesses in opposing " - "arguments and provide compelling alternatives." - ), - model_name="gpt-4o-mini", - max_loops=1, -) +from swarms import DebateWithJudge -# Create the Judge agent (evaluates and synthesizes) -judge_agent = Agent( - agent_name="Judge-Agent", - system_prompt=( - "You are an impartial judge who evaluates debates. " - "You carefully analyze arguments from both sides, identify " - "strengths and weaknesses, and provide balanced synthesis. " - "You may declare a winner or provide a refined answer that " - "incorporates the best elements from both arguments." - ), - model_name="gpt-4o-mini", - max_loops=1, -) - -# Create the DebateWithJudge system debate_system = DebateWithJudge( - pro_agent=pro_agent, - con_agent=con_agent, - judge_agent=judge_agent, - max_rounds=3, # Run 3 rounds of debate and refinement - output_type="str-all-except-first", # Return as formatted string - verbose=True, # Enable verbose logging + preset_agents=True, + max_loops=3, + model_name="gpt-4o-mini", + output_type="str-all-except-first", + verbose=True, ) -# Define the debate topic topic = ( "Should artificial intelligence be regulated by governments? " "Discuss the balance between innovation and safety." ) -# Run the debate result = debate_system.run(task=topic) - -print(result) diff --git a/examples/multi_agent/debate_examples/policy_debate_example.py b/examples/multi_agent/debate_examples/policy_debate_example.py index a2e7c5ce..7e2d6e67 100644 --- a/examples/multi_agent/debate_examples/policy_debate_example.py +++ b/examples/multi_agent/debate_examples/policy_debate_example.py @@ -59,7 +59,7 @@ debate_system = DebateWithJudge( pro_agent=pro_agent, con_agent=con_agent, judge_agent=judge_agent, - max_rounds=3, + max_loops=3, output_type="str-all-except-first", verbose=True, ) diff --git a/examples/multi_agent/debate_examples/technical_architecture_debate_example.py b/examples/multi_agent/debate_examples/technical_architecture_debate_example.py index 24ecf3d1..964b62f0 100644 --- a/examples/multi_agent/debate_examples/technical_architecture_debate_example.py +++ b/examples/multi_agent/debate_examples/technical_architecture_debate_example.py @@ -49,7 +49,7 @@ architecture_debate = DebateWithJudge( pro_agent=pro_agent, con_agent=con_agent, judge_agent=judge_agent, - max_rounds=2, # Fewer rounds for more focused technical debates + max_loops=2, # Fewer loops for more focused technical debates output_type="str-all-except-first", verbose=True, ) diff --git a/swarms/prompts/__init__.py b/swarms/prompts/__init__.py index e73a118f..6bd4edfb 100644 --- a/swarms/prompts/__init__.py +++ b/swarms/prompts/__init__.py @@ -8,6 +8,11 @@ from swarms.prompts.operations_agent_prompt import ( ) from swarms.prompts.product_agent_prompt import PRODUCT_AGENT_PROMPT from swarms.prompts.prompt import Prompt +from swarms.prompts.autonomous_agent_prompt import ( + AUTONOMOUS_AGENT_SYSTEM_PROMPT, + get_autonomous_agent_prompt, + get_autonomous_agent_prompt_with_context, +) __all__ = [ "CODE_INTERPRETER", @@ -18,4 +23,7 @@ __all__ = [ "PRODUCT_AGENT_PROMPT", "DOCUMENTATION_WRITER_SOP", "Prompt", + "AUTONOMOUS_AGENT_SYSTEM_PROMPT", + "get_autonomous_agent_prompt", + "get_autonomous_agent_prompt_with_context", ] diff --git a/swarms/prompts/autonomous_agent_prompt.py b/swarms/prompts/autonomous_agent_prompt.py new file mode 100644 index 00000000..54bddedb --- /dev/null +++ b/swarms/prompts/autonomous_agent_prompt.py @@ -0,0 +1,357 @@ +""" +Comprehensive prompt for autonomous agent operating in auto loop mode. + +This prompt guides the agent through the structured workflow: +plan -> think -> action -> subtask_done -> complete_task +""" + +AUTONOMOUS_AGENT_SYSTEM_PROMPT = """You are an elite autonomous agent operating in a sophisticated autonomous loop structure. Your mission is to reliably and efficiently complete complex tasks by breaking them down into manageable subtasks, executing them systematically, and providing comprehensive results. + +## CORE PRINCIPLES + +1. **Excellence First**: The quality of your outputs directly impacts user success. Strive for perfection. +2. **Systematic Approach**: Break down complex tasks into clear, actionable steps with proper dependencies. +3. **Action-Oriented**: Focus on execution and completion, not endless analysis or communication. +4. **Adaptive Problem-Solving**: When obstacles arise, analyze, adapt, and continue forward. +5. **Transparency**: Keep users informed of progress, but prioritize execution over communication. + +## AUTONOMOUS LOOP WORKFLOW + +You operate in a structured three-phase cycle: + +### PHASE 1: PLANNING +**Objective**: Create a comprehensive, actionable plan for the task. + +**Process**: +1. Analyze the main task thoroughly +2. Break it down into smaller, manageable subtasks +3. Assign appropriate priorities (critical, high, medium, low) +4. Identify dependencies between subtasks +5. Use the `create_plan` tool to formalize your plan + +**Guidelines**: +- Each subtask should be specific and actionable +- Critical priority tasks are foundational and must be completed first +- Dependencies ensure logical execution order +- The plan should be comprehensive but not overly granular + +**Example Plan Structure**: +``` +Task: Research and write a report on renewable energy +├── research_sources (critical) - Identify authoritative sources +├── gather_data (high, depends on: research_sources) - Collect relevant data +├── analyze_trends (high, depends on: gather_data) - Analyze patterns +├── draft_report (critical, depends on: analyze_trends) - Write initial draft +└── finalize_report (medium, depends on: draft_report) - Polish and format +``` + +### PHASE 2: EXECUTION +**Objective**: Complete each subtask systematically and efficiently. + +**Workflow for Each Subtask**: +1. **Brief Analysis** (Optional but Recommended): + - Use the `think` tool ONCE to analyze what needs to be done + - Assess complexity, required tools, and approach + - Set clear expectations for the subtask outcome + +2. **Take Action**: + - Use available tools to complete the work + - Execute concrete actions, not just analysis + - Make progress toward the subtask goal + +3. **Communicate Progress** (Optional, Limit to Once): + - Use `respond_to_user` ONCE if significant progress is made or clarification is needed + - Do NOT repeatedly communicate - focus on execution + - Communication should be informative, not repetitive + +4. **Complete Subtask**: + - When the subtask is finished, call `subtask_done` with: + - task_id: The ID of the completed subtask + - summary: A clear summary of what was accomplished + - success: true if completed successfully, false otherwise + +**Critical Rules**: +- DO NOT call `think` more than 2 times consecutively - take action instead +- DO NOT call `respond_to_user` more than 2 times consecutively - execute instead +- DO NOT get stuck in analysis or communication loops +- DO focus on completing the actual work +- DO mark subtasks as done when finished, not when you're "about to start" + +**Tool Usage Priority**: +1. Use available user-provided tools for actual work +2. Use `think` briefly for complex situations (max 2 times) +3. Use `respond_to_user` sparingly for important updates (max 2 times) +4. Always end with `subtask_done` when work is complete + +### PHASE 3: THINKING (Between Tasks) +**Objective**: Reflect on progress and determine next steps. + +**When to Enter Thinking Phase**: +- After completing a subtask +- When assessing overall progress +- Before finalizing the main task + +**Process**: +1. Assess current state: + - How many subtasks are completed? + - What progress has been made? + - What remains to be done? + +2. Determine next action: + - If all subtasks are complete: Call `complete_task` + - If subtasks remain: Return to execution phase for the next task + - If stuck: Analyze the issue and take corrective action + +3. Keep it brief: + - Thinking phase should be quick assessment, not deep analysis + - Move to action quickly + +## TOOL USAGE GUIDELINES + +### create_plan +**When to Use**: At the very beginning, when you receive the main task. +**How to Use**: +- Provide a clear task_description +- Break down into steps with step_id, description, priority, and dependencies +- Ensure the plan is comprehensive and actionable + +### think +**When to Use**: +- Before starting a complex subtask (optional but recommended) +- When you need to analyze a situation +- Maximum: 2 consecutive calls before you MUST take action + +**How to Use**: +- Provide current_state, analysis, next_actions, and confidence +- Be concise and action-oriented +- Use it to plan, not to procrastinate + +**WARNING**: If you call `think` 2+ times consecutively, it will be BLOCKED. You must take action. + +### respond_to_user +**When to Use**: +- To provide important progress updates +- To ask critical questions that block progress +- To share significant results or findings +- Maximum: 2 consecutive calls before you MUST take action + +**How to Use**: +- message: Clear, informative message +- message_type: One of: update, question, result, error, info +- Be concise and actionable + +**WARNING**: If you call `respond_to_user` 2+ times consecutively, you will be forced to execute. Stop communicating and start working. + +### subtask_done +**When to Use**: After completing a subtask, not before starting it. +**How to Use**: +- task_id: The exact step_id from your plan +- summary: What was accomplished (be specific) +- success: true if completed successfully, false if failed + +**Critical**: Only call this when the subtask is ACTUALLY DONE, not when you're "about to start" or "planning to do it." + +### complete_task +**When to Use**: ONLY when ALL subtasks are completed. +**How to Use**: +- task_id: Usually "main_task" or the original task identifier +- summary: Comprehensive summary of the entire task completion +- success: true if the main task was successful +- results: Detailed results (optional) +- lessons_learned: Key insights (optional) + +**Critical**: Do NOT call this until every subtask shows status "completed" or "failed". + +## LOOP PREVENTION STRATEGIES + +### Thinking Loop Prevention +- **Limit**: Maximum 2 consecutive `think` calls +- **Enforcement**: After 2 calls, `think` tool is BLOCKED +- **Solution**: Take action immediately, use available tools, complete the work + +### Communication Loop Prevention +- **Limit**: Maximum 2 consecutive `respond_to_user` calls +- **Enforcement**: After 2 calls, you're forced to execute +- **Solution**: Stop talking, start working, complete the task + +### Action Encouragement +- After thinking, immediately execute +- After communicating, immediately execute +- Focus on tool usage and task completion +- Mark subtasks as done when finished + +## BEST PRACTICES + +### Planning Best Practices +1. **Break Down Appropriately**: Not too granular (micro-tasks), not too broad (unclear tasks) +2. **Set Priorities Correctly**: Critical tasks are foundational, high tasks are important, medium/low are nice-to-have +3. **Identify Dependencies**: Ensure logical execution order +4. **Be Specific**: Each subtask should have a clear, actionable description + +### Execution Best Practices +1. **Think Once, Act Many**: Use `think` briefly, then execute multiple actions +2. **Communicate Sparingly**: Use `respond_to_user` for important updates only +3. **Use Tools Effectively**: Leverage available tools to accomplish work +4. **Complete Before Moving On**: Finish one subtask before starting the next +5. **Mark Progress**: Always call `subtask_done` when a subtask is complete + +### Thinking Best Practices +1. **Be Brief**: Quick assessment, not deep philosophical analysis +2. **Be Action-Oriented**: Focus on what to do next, not just reflection +3. **Move Forward**: After thinking, immediately take action + +### Communication Best Practices +1. **Be Informative**: Share useful information, not fluff +2. **Be Concise**: Get to the point quickly +3. **Be Actionable**: If asking questions, make them specific and necessary +4. **Limit Frequency**: One update per subtask is usually sufficient + +## COMMON PITFALLS TO AVOID + +### ❌ DON'T: +- Call `think` repeatedly without taking action +- Call `respond_to_user` repeatedly without executing +- Call `subtask_done` before actually completing the work +- Call `complete_task` before all subtasks are done +- Get stuck in analysis paralysis +- Over-communicate instead of executing +- Skip the planning phase +- Ignore dependencies in your plan + +### ✅ DO: +- Create a comprehensive plan first +- Think briefly, then act decisively +- Use tools to accomplish actual work +- Complete subtasks before marking them done +- Communicate only when necessary +- Follow the workflow: plan -> think -> action -> subtask_done +- Complete all subtasks before calling `complete_task` +- Provide comprehensive final reports + +## TASK COMPLETION CHECKLIST + +Before calling `complete_task`, verify: +- [ ] All subtasks have been marked as "completed" or "failed" +- [ ] The main task objective has been achieved +- [ ] Results are ready to be shared +- [ ] A comprehensive summary can be provided + +## EXAMPLE WORKFLOW + +**Task**: "Research the top 5 AI companies and create a comparison report" + +**Step 1: Planning** +``` +create_plan( + task_description="Research top 5 AI companies and create comparison report", + steps=[ + {"step_id": "identify_companies", "description": "Identify top 5 AI companies", "priority": "critical"}, + {"step_id": "gather_data", "description": "Gather financial and product data", "priority": "high", "dependencies": ["identify_companies"]}, + {"step_id": "analyze_comparison", "description": "Compare companies across metrics", "priority": "high", "dependencies": ["gather_data"]}, + {"step_id": "create_report", "description": "Write comparison report", "priority": "critical", "dependencies": ["analyze_comparison"]} + ] +) +``` + +**Step 2: Execution - Subtask 1** +``` +think(current_state="Starting first subtask", analysis="Need to identify top AI companies", next_actions=["Use search tools"], confidence=0.9) +[Use search tools to find top AI companies] +subtask_done(task_id="identify_companies", summary="Identified top 5 AI companies: OpenAI, Anthropic, Google DeepMind, Meta AI, Microsoft AI", success=True) +``` + +**Step 3: Execution - Subtask 2** +``` +[Use tools to gather data on each company] +respond_to_user(message="Gathered financial data for all 5 companies", message_type="update") +subtask_done(task_id="gather_data", summary="Collected financial metrics, product information, and market position for all companies", success=True) +``` + +**Step 4: Execution - Subtask 3** +``` +[Analyze and compare companies] +subtask_done(task_id="analyze_comparison", summary="Compared companies across revenue, market cap, product offerings, and innovation metrics", success=True) +``` + +**Step 5: Execution - Subtask 4** +``` +[Create comprehensive report] +subtask_done(task_id="create_report", summary="Created detailed comparison report with analysis and recommendations", success=True) +``` + +**Step 6: Completion** +``` +complete_task( + task_id="main_task", + summary="Successfully researched top 5 AI companies and created comprehensive comparison report", + success=True, + results="Report includes detailed analysis of OpenAI, Anthropic, Google DeepMind, Meta AI, and Microsoft AI", + lessons_learned="AI market is rapidly evolving with significant competition" +) +``` + +## FINAL REMINDERS + +1. **Plan First**: Always create a plan before executing +2. **Think Briefly**: Use `think` for quick analysis, not endless reflection +3. **Execute Decisively**: Take concrete actions, use tools, make progress +4. **Communicate Sparingly**: Update users when necessary, but prioritize work +5. **Complete Systematically**: Finish subtasks before marking them done +6. **Finalize Properly**: Only call `complete_task` when everything is finished + +Remember: You are an elite autonomous agent. Your goal is to complete tasks efficiently and effectively. Avoid loops, focus on execution, and deliver exceptional results. + +Now, begin your mission with excellence.""" + + +def get_autonomous_agent_prompt() -> str: + """ + Get the comprehensive autonomous agent system prompt. + + Returns: + str: The full autonomous agent system prompt + """ + return AUTONOMOUS_AGENT_SYSTEM_PROMPT + + +def get_autonomous_agent_prompt_with_context( + agent_name: str = None, + agent_description: str = None, + available_tools: list = None, +) -> str: + """ + Get the autonomous agent prompt with contextual information. + + Args: + agent_name: Name of the agent + agent_description: Description of the agent's role + available_tools: List of available tool names + + Returns: + str: Contextualized autonomous agent prompt + """ + prompt = AUTONOMOUS_AGENT_SYSTEM_PROMPT + + if agent_name: + prompt = prompt.replace( + "You are an elite autonomous agent", + f"You are {agent_name}, an elite autonomous agent", + ) + + if agent_description: + prompt += f"\n\n## AGENT ROLE\n{agent_description}\n" + + if available_tools and len(available_tools) > 0: + tools_list = "\n".join( + [f"- {tool}" for tool in available_tools[:20]] + ) # Limit to 20 tools + prompt += f"\n\n## AVAILABLE TOOLS\nYou have access to the following tools:\n{tools_list}\n" + if len(available_tools) > 20: + prompt += ( + f"\n(and {len(available_tools) - 20} more tools)\n" + ) + prompt += ( + "\nUse these tools effectively to complete your tasks.\n" + ) + + return prompt diff --git a/swarms/structs/debate_with_judge.py b/swarms/structs/debate_with_judge.py index e3104198..73a33a3c 100644 --- a/swarms/structs/debate_with_judge.py +++ b/swarms/structs/debate_with_judge.py @@ -1,13 +1,4 @@ -""" -Debate/Self-Refinement with Judge Architecture - -This module implements a debate architecture where two agents (Pro and Con) -debate a topic, and a Judge agent evaluates their arguments and provides -refined synthesis. The process repeats for N rounds to progressively refine -the answer. -""" - -from typing import List, Union +from typing import List, Optional, Union from loguru import logger @@ -18,6 +9,66 @@ from swarms.utils.history_output_formatter import ( ) +# Pre-built system prompts for debate agents +PRO_AGENT_SYSTEM_PROMPT = """You are an expert debater specializing in arguing IN FAVOR of propositions. + +Your Role: +- Present compelling, well-reasoned arguments supporting your assigned position +- Use evidence, logic, and persuasive rhetoric to make your case +- Anticipate and preemptively address potential counterarguments +- Build upon previous arguments when refining your position + +Debate Guidelines: +1. Structure your arguments clearly with main points and supporting evidence +2. Use concrete examples and data when available +3. Acknowledge valid opposing points while explaining why your position is stronger +4. Maintain a professional, respectful tone throughout the debate +5. Focus on the strongest aspects of your position + +Your goal is to present the most compelling case possible for the Pro position.""" + +CON_AGENT_SYSTEM_PROMPT = """You are an expert debater specializing in arguing AGAINST propositions. + +Your Role: +- Present compelling, well-reasoned counter-arguments opposing the given position +- Identify weaknesses, flaws, and potential negative consequences +- Challenge assumptions and evidence presented by the opposing side +- Build upon previous arguments when refining your position + +Debate Guidelines: +1. Structure your counter-arguments clearly with main points and supporting evidence +2. Use concrete examples and data to support your opposition +3. Directly address and refute the Pro's arguments +4. Maintain a professional, respectful tone throughout the debate +5. Focus on the most significant weaknesses of the opposing position + +Your goal is to present the most compelling case possible against the proposition.""" + +JUDGE_AGENT_SYSTEM_PROMPT = """You are an impartial judge and critical evaluator of debates. + +Your Role: +- Objectively evaluate arguments from both Pro and Con sides +- Identify strengths and weaknesses in each position +- Provide constructive feedback for improvement +- Synthesize the best elements from both sides when appropriate +- Render fair verdicts based on argument quality, not personal bias + +Evaluation Criteria: +1. Logical coherence and reasoning quality +2. Evidence and supporting data quality +3. Persuasiveness and rhetorical effectiveness +4. Responsiveness to opposing arguments +5. Overall argument structure and clarity + +Judgment Guidelines: +- Be specific about what makes arguments strong or weak +- Provide actionable feedback for improvement +- When synthesizing, explain how elements from both sides complement each other +- In final rounds, provide clear conclusions with justification + +Your goal is to facilitate productive debate and arrive at well-reasoned conclusions.""" + + class DebateWithJudge: """ A debate architecture with self-refinement through a judge agent. @@ -26,7 +77,7 @@ class DebateWithJudge: 1. Agent A (Pro) and Agent B (Con) present opposing arguments 2. Both arguments are evaluated by a Judge/Critic Agent 3. The Judge provides a winner or synthesis → refined answer - 4. The process repeats for N rounds to progressively improve the answer + 4. The process repeats for N loops to progressively improve the answer Architecture: Agent A (Pro) ↔ Agent B (Con) @@ -37,61 +88,209 @@ class DebateWithJudge: ▼ Winner or synthesis → refined answer + Initialization Options: + 1. Provide individual agents: pro_agent, con_agent, judge_agent + 2. Provide a list of agents: agents=[pro, con, judge] + 3. Use preset agents: preset_agents=True (creates default agents automatically) + Attributes: pro_agent (Agent): The agent arguing in favor (Pro position). con_agent (Agent): The agent arguing against (Con position). judge_agent (Agent): The judge agent that evaluates arguments and provides synthesis. - max_rounds (int): Maximum number of debate rounds to execute. + max_loops (int): Maximum number of debate loops to execute. output_type (str): Format for the output conversation history. verbose (bool): Whether to enable verbose logging. + + Examples: + >>> # Using preset agents (simplest approach) + >>> debate = DebateWithJudge(preset_agents=True, max_loops=3) + >>> result = debate.run("Should AI be regulated?") + + >>> # Using a list of agents + >>> agents = [pro_agent, con_agent, judge_agent] + >>> debate = DebateWithJudge(agents=agents, max_loops=3) + >>> result = debate.run("Is remote work better than office work?") + + >>> # Using individual agent parameters + >>> debate = DebateWithJudge( + ... pro_agent=my_pro_agent, + ... con_agent=my_con_agent, + ... judge_agent=my_judge_agent + ... ) + >>> result = debate.run("Should we colonize Mars?") """ def __init__( self, - pro_agent: Agent, - con_agent: Agent, - judge_agent: Agent, - max_rounds: int = 3, + pro_agent: Optional[Agent] = None, + con_agent: Optional[Agent] = None, + judge_agent: Optional[Agent] = None, + agents: Optional[List[Agent]] = None, + preset_agents: bool = False, + max_loops: int = 3, output_type: str = "str-all-except-first", verbose: bool = True, + model_name: str = "gpt-4o-mini", ): """ Initialize the DebateWithJudge architecture. Args: - pro_agent (Agent): The agent arguing in favor (Pro position). - con_agent (Agent): The agent arguing against (Con position). - judge_agent (Agent): The judge agent that evaluates arguments and provides synthesis. - max_rounds (int): Maximum number of debate rounds to execute. Defaults to 3. - output_type (str): Format for the output conversation history. Defaults to "str-all-except-first". + pro_agent (Optional[Agent]): The agent arguing in favor (Pro position). + Not required if using agents list or preset_agents. + con_agent (Optional[Agent]): The agent arguing against (Con position). + Not required if using agents list or preset_agents. + judge_agent (Optional[Agent]): The judge agent that evaluates arguments. + Not required if using agents list or preset_agents. + agents (Optional[List[Agent]]): A list of exactly 3 agents in order: + [pro_agent, con_agent, judge_agent]. Takes precedence over individual + agent parameters if provided. + preset_agents (bool): If True, creates default pro, con, and judge agents + automatically. Used when no agents are provided. Defaults to False. + max_loops (int): Maximum number of debate loops to execute. Defaults to 3. + output_type (str): Format for the output conversation history. + Defaults to "str-all-except-first". verbose (bool): Whether to enable verbose logging. Defaults to True. + model_name (str): The model name to use for preset agents. + Defaults to "gpt-4o-mini". Raises: - ValueError: If any of the required agents are None or if max_rounds is less than 1. + ValueError: If no valid agent configuration is provided (no agents, no list, + and preset_agents is False), if agents list doesn't have exactly 3 agents, + or if max_loops is less than 1. """ - if pro_agent is None: - raise ValueError("pro_agent cannot be None") - if con_agent is None: - raise ValueError("con_agent cannot be None") - if judge_agent is None: - raise ValueError("judge_agent cannot be None") - if max_rounds < 1: - raise ValueError("max_rounds must be at least 1") - - self.pro_agent = pro_agent - self.con_agent = con_agent - self.judge_agent = judge_agent - self.max_rounds = max_rounds + if max_loops < 1: + raise ValueError("max_loops must be at least 1") + + self.max_loops = max_loops self.output_type = output_type self.verbose = verbose + self.model_name = model_name + + # Determine agent configuration + self._configure_agents( + pro_agent=pro_agent, + con_agent=con_agent, + judge_agent=judge_agent, + agents=agents, + preset_agents=preset_agents, + ) # Initialize conversation history self.conversation = Conversation() if self.verbose: logger.info( - f"DebateWithJudge initialized with {max_rounds} rounds" + f"DebateWithJudge initialized with {max_loops} loops" ) + logger.info( + f"Pro Agent: {self.pro_agent.agent_name}, " + f"Con Agent: {self.con_agent.agent_name}, " + f"Judge Agent: {self.judge_agent.agent_name}" + ) + + def _configure_agents( + self, + pro_agent: Optional[Agent], + con_agent: Optional[Agent], + judge_agent: Optional[Agent], + agents: Optional[List[Agent]], + preset_agents: bool, + ) -> None: + """ + Configure agents based on provided parameters. + + Priority order: + 1. agents list (if provided and valid) + 2. Individual agent parameters (if all provided) + 3. preset_agents (if True) + + Args: + pro_agent: The pro agent (optional). + con_agent: The con agent (optional). + judge_agent: The judge agent (optional). + agents: List of agents [pro, con, judge] (optional). + preset_agents: Whether to create default agents. + + Raises: + ValueError: If no valid configuration is provided. + """ + # Option 1: Use agents list + if agents is not None: + if len(agents) != 3: + raise ValueError( + f"agents list must contain exactly 3 agents " + f"[pro_agent, con_agent, judge_agent], got {len(agents)}" + ) + for i, agent in enumerate(agents): + if not isinstance(agent, Agent): + raise ValueError( + f"agents[{i}] must be an Agent instance, got {type(agent)}" + ) + self.pro_agent = agents[0] + self.con_agent = agents[1] + self.judge_agent = agents[2] + if self.verbose: + logger.info("Using agents from provided list") + return + + # Option 2: Use individual agent parameters + if pro_agent is not None and con_agent is not None and judge_agent is not None: + self.pro_agent = pro_agent + self.con_agent = con_agent + self.judge_agent = judge_agent + if self.verbose: + logger.info("Using individually provided agents") + return + + # Option 3: Create preset agents + if preset_agents: + self._create_preset_agents() + if self.verbose: + logger.info("Using preset agents") + return + + # No valid configuration + raise ValueError( + "No valid agent configuration provided. Either:\n" + "1. Provide all three agents: pro_agent, con_agent, judge_agent\n" + "2. Provide an agents list with exactly 3 agents: agents=[pro, con, judge]\n" + "3. Set preset_agents=True to use default agents" + ) + + def _create_preset_agents(self) -> None: + """ + Create preset agents with default configurations. + + Creates three agents (Pro, Con, Judge) with predefined system prompts + optimized for debate scenarios. + """ + self.pro_agent = Agent( + agent_name="Pro-Debater", + agent_description="Expert debater arguing in favor of propositions", + system_prompt=PRO_AGENT_SYSTEM_PROMPT, + model_name=self.model_name, + max_loops=1, + verbose=self.verbose, + ) + + self.con_agent = Agent( + agent_name="Con-Debater", + agent_description="Expert debater arguing against propositions", + system_prompt=CON_AGENT_SYSTEM_PROMPT, + model_name=self.model_name, + max_loops=1, + verbose=self.verbose, + ) + + self.judge_agent = Agent( + agent_name="Debate-Judge", + agent_description="Impartial judge evaluating debate arguments", + system_prompt=JUDGE_AGENT_SYSTEM_PROMPT, + model_name=self.model_name, + max_loops=1, + verbose=self.verbose, + ) def run(self, task: str) -> Union[str, List, dict]: """ @@ -119,11 +318,11 @@ class DebateWithJudge: if self.verbose: logger.info(f"Starting debate on: {task}") - # Execute N rounds of debate and refinement - for round_num in range(self.max_rounds): + # Execute N loops of debate and refinement + for round_num in range(self.max_loops): if self.verbose: logger.info( - f"Round {round_num + 1}/{self.max_rounds}" + f"Loop {round_num + 1}/{self.max_loops}" ) # Step 1: Pro agent presents argument @@ -164,7 +363,7 @@ class DebateWithJudge: f"Judge synthesis: {judge_synthesis[:100]}..." ) - # Use judge's synthesis as input for next round + # Use judge's synthesis as input for next loop current_topic = judge_synthesis # Return formatted output @@ -210,7 +409,7 @@ class DebateWithJudge: f"and weaknesses, and provide a refined synthesis that incorporates the " f"best elements from both sides. You may declare a winner or provide a " f"balanced synthesis. Your output will be used to refine the discussion " - f"in subsequent rounds." + f"in subsequent loops." ) self.judge_agent.run(task=judge_intro) @@ -220,7 +419,7 @@ class DebateWithJudge: Args: topic (str): The current topic or refined question. - round_num (int): The current round number (0-indexed). + round_num (int): The current loop number (0-indexed). Returns: str: The prompt for the Pro agent. @@ -232,7 +431,7 @@ class DebateWithJudge: ) else: return ( - f"Round {round_num + 1}: Based on the judge's previous evaluation, " + f"Loop {round_num + 1}: Based on the judge's previous evaluation, " f"present an improved argument in favor of: {topic}\n\n" f"Address any weaknesses identified and strengthen your position " f"with additional evidence and reasoning." @@ -247,7 +446,7 @@ class DebateWithJudge: Args: topic (str): The current topic or refined question. pro_argument (str): The Pro agent's argument to counter. - round_num (int): The current round number (0-indexed). + round_num (int): The current loop number (0-indexed). Returns: str: The prompt for the Con agent. @@ -261,7 +460,7 @@ class DebateWithJudge: ) else: return ( - f"Round {round_num + 1}: Based on the judge's previous evaluation, " + f"Loop {round_num + 1}: Based on the judge's previous evaluation, " f"present an improved counter-argument against: {topic}\n\n" f"Pro's current argument:\n{pro_argument}\n\n" f"Address any weaknesses identified and strengthen your counter-position " @@ -282,22 +481,22 @@ class DebateWithJudge: topic (str): The current topic or refined question. pro_argument (str): The Pro agent's argument. con_argument (str): The Con agent's argument. - round_num (int): The current round number (0-indexed). + round_num (int): The current loop number (0-indexed). Returns: str: The prompt for the Judge agent. """ - is_final_round = round_num == self.max_rounds - 1 + is_final_round = round_num == self.max_loops - 1 prompt = ( - f"Round {round_num + 1}/{self.max_rounds}: Evaluate the debate on: {topic}\n\n" + f"Loop {round_num + 1}/{self.max_loops}: Evaluate the debate on: {topic}\n\n" f"Pro's argument ({self.pro_agent.agent_name}):\n{pro_argument}\n\n" f"Con's argument ({self.con_agent.agent_name}):\n{con_argument}\n\n" ) if is_final_round: prompt += ( - "This is the final round. Provide a comprehensive final evaluation:\n" + "This is the final loop. Provide a comprehensive final evaluation:\n" "- Identify the strongest points from both sides\n" "- Determine a winner OR provide a balanced synthesis\n" "- Present a refined, well-reasoned answer that incorporates the best " @@ -309,8 +508,8 @@ class DebateWithJudge: "Evaluate both arguments and provide:\n" "- Assessment of strengths and weaknesses in each argument\n" "- A refined synthesis that incorporates the best elements from both sides\n" - "- Specific feedback for improvement in the next round\n" - "- Your synthesis will be used as the topic for the next round" + "- Specific feedback for improvement in the next loop\n" + "- Your synthesis will be used as the topic for the next loop" ) return prompt diff --git a/swarms/structs/hiearchical_swarm.py b/swarms/structs/hiearchical_swarm.py index 8b8f8fdf..407b7d58 100644 --- a/swarms/structs/hiearchical_swarm.py +++ b/swarms/structs/hiearchical_swarm.py @@ -37,8 +37,8 @@ from rich.text import Text from rich.tree import Tree from swarms.prompts.hiearchical_system_prompt import ( - HIEARCHICAL_SWARM_SYSTEM_PROMPT, DIRECTOR_PLANNING_PROMPT, + HIEARCHICAL_SWARM_SYSTEM_PROMPT, ) from swarms.prompts.multi_agent_collab_prompt import ( MULTI_AGENT_COLLAB_PROMPT_TWO, diff --git a/swarms/structs/maker.py b/swarms/structs/maker.py new file mode 100644 index 00000000..ca0c0a1e --- /dev/null +++ b/swarms/structs/maker.py @@ -0,0 +1,1044 @@ +""" +MAKER: Massively decomposed Agentic processes with first-to-ahead-by-K Error correction and Red-flagging + +This module implements the MAKER framework from the paper: +"Solving a Million-Step LLM Task with Zero Errors" by Meyerson et al. (2025) + +MAKER is a general-purpose framework for solving long-horizon tasks with extreme precision through: +1. MAD (Maximal Agentic Decomposition): Breaking tasks into minimal subtasks +2. First-to-ahead-by-K Voting: Error correction through voting +3. Red-flagging: Discarding unreliable responses + +The framework enables solving tasks with millions of LLM steps with zero errors +by exploiting the modularity of extreme decomposition to apply error correction +at each step. + +Paper: https://arxiv.org/abs/2511.09030 +""" + +import uuid +import math +import concurrent.futures +from typing import Any, Callable, Dict, List, Optional, Tuple + +from swarms.structs.agent import Agent +from swarms.structs.conversation import Conversation +from swarms.utils.loguru_logger import initialize_logger + +logger = initialize_logger(log_folder="maker") + + +class MAKER: + """ + MAKER: Maximal Agentic decomposition, first-to-ahead-by-K Error correction, and Red-flagging. + + A general-purpose framework for solving long-horizon tasks with extreme precision + through massive decomposition of tasks into subtasks, each solved by focused + microagents with error correction through voting. + + This implementation follows the MAKER framework from the paper: + "Solving a Million-Step LLM Task with Zero Errors" by Meyerson et al. (2025) + + The framework consists of three core components: + + 1. MAD (Maximal Agentic Decomposition): + By breaking a task with s steps into s subtasks, each agent can focus on a + single step, reducing context confusion and improving reliability. + + 2. First-to-ahead-by-K Voting: + For each step, multiple samples are drawn until one candidate action is + K votes ahead of all others, ensuring high probability of correctness. + + 3. Red-flagging: + Responses that show signs of unreliability (overly long or incorrectly + formatted) are discarded, reducing correlated errors. + + The framework is task-agnostic. Users provide: + - A task/objective to complete (main input to run()) + - A function to format prompts for each step + - A function to parse responses and extract the action/result + - A function to validate responses (for red-flagging) + - Optional: A function to update state between steps + + Attributes: + id (str): Unique identifier for the MAKER instance. + name (str): Human-readable name for the system. + description (str): Description of the system's purpose. + model_name (str): Name of the LLM model to use. + k (int): Vote threshold - candidate must be k votes ahead to win. + max_tokens (int): Maximum tokens for LLM response (red-flag threshold). + temperature (float): Temperature for LLM sampling. + temperature_first (float): Temperature for first vote (typically 0 for determinism). + system_prompt (str): System prompt for the microagents. + format_prompt (Callable): Function to format the prompt for each step. + parse_response (Callable): Function to parse LLM response into a result. + validate_response (Callable): Function to validate response format (red-flagging). + update_state (Callable): Function to update state after each step. + max_workers (int): Maximum parallel workers for concurrent sampling. + verbose (bool): Whether to enable verbose logging. + + Example: + >>> from swarms.structs.maker import MAKER + >>> + >>> # Define task-specific functions + >>> def format_prompt(task, state, step_idx, previous_result): + ... return f"Task: {task}\\nState: {state}\\nStep {step_idx+1}: What's next?" + >>> + >>> def parse_response(response): + ... return response.strip() + >>> + >>> def validate_response(response, max_tokens): + ... return len(response) < max_tokens * 4 and response.strip() != "" + >>> + >>> # Create MAKER instance + >>> maker = MAKER( + ... name="MyTaskSolver", + ... model_name="gpt-4o-mini", + ... system_prompt="You solve tasks step by step.", + ... format_prompt=format_prompt, + ... parse_response=parse_response, + ... validate_response=validate_response, + ... k=3, + ... ) + >>> + >>> # Run the solver with your task + >>> results = maker.run( + ... task="Calculate the factorial of 5 step by step", + ... max_steps=5 + ... ) + + References: + Meyerson, E., et al. (2025). Solving a Million-Step LLM Task with Zero Errors. + arXiv:2511.09030 + """ + + def __init__( + self, + id: str = None, + name: str = "MAKER", + description: str = "Massively decomposed Agentic processes with Error correction and Red-flagging", + model_name: str = "gpt-4o-mini", + system_prompt: str = "You are a precise assistant that solves tasks step by step. Follow instructions exactly and provide clear, structured outputs.", + k: int = 3, + max_tokens: int = 1024, + temperature: float = 0.1, + temperature_first: float = 0.0, + format_prompt: Callable[[str, Any, int, Any], str] = None, + parse_response: Callable[[str], Any] = None, + validate_response: Callable[[str, int], bool] = None, + update_state: Callable[[Any, Any, int], Any] = None, + initial_state: Any = None, + max_workers: int = None, + verbose: bool = True, + max_retries_per_step: int = 100, + agents: List[Agent] = None, + ): + """ + Initialize the MAKER framework. + + Args: + id: Unique identifier for the MAKER instance. Auto-generated if not provided. + name: Human-readable name for the system. + description: Description of the system's purpose. + model_name: Name of the LLM model to use (e.g., "gpt-4o-mini", "gpt-4.1-mini"). + system_prompt: System prompt for the microagents. Should describe the task domain + and expected output format. + k: Vote threshold - a candidate must be k votes ahead of all others to win. + Higher k means more reliability but higher cost. Typical values: 2-5. + max_tokens: Maximum tokens for LLM response. Responses exceeding this are + red-flagged as the model may be confused. + temperature: Temperature for LLM sampling (used for votes after the first). + Lower values (0.1-0.3) provide more consistent results. + temperature_first: Temperature for first vote. Using 0 ensures the best + deterministic guess is included in the vote set. + format_prompt: Function(task, state, step_idx, previous_result) -> str that formats + the prompt for each step. The task is the main objective passed to run(). + If None, uses a simple default. + parse_response: Function(response_text) -> result that extracts the result + from the LLM response. The result must be hashable for voting. + If None, returns the stripped response text. + validate_response: Function(response_text, max_tokens) -> bool that validates + the response format. Returns True if valid, False to red-flag. + If None, only checks response length. + update_state: Function(current_state, result, step_idx) -> new_state that + updates the state after each step. If None, state is unchanged. + initial_state: Initial state for the task. Can be any type depending on your task. + max_workers: Maximum parallel workers for concurrent vote sampling. + If None, uses k as the number of workers. + verbose: Whether to enable verbose logging. + max_retries_per_step: Maximum retries per step before raising an error. + agents: Optional list of pre-configured agents to use instead of creating new ones. + If provided, agents will be cycled through for each vote. + """ + self.id = id if id is not None else str(uuid.uuid4()) + self.name = name + self.description = description + self.model_name = model_name + self.system_prompt = system_prompt + self.k = k + self.max_tokens = max_tokens + self.temperature = temperature + self.temperature_first = temperature_first + self.max_workers = max_workers if max_workers is not None else k + self.verbose = verbose + self.max_retries_per_step = max_retries_per_step + self.agents = agents + self.initial_state = initial_state + + # Task-specific functions with defaults + self.format_prompt = ( + format_prompt + if format_prompt is not None + else self._default_format_prompt + ) + self.parse_response = ( + parse_response + if parse_response is not None + else self._default_parse_response + ) + self.validate_response = ( + validate_response + if validate_response is not None + else self._default_validate_response + ) + self.update_state = ( + update_state + if update_state is not None + else self._default_update_state + ) + + # Initialize conversation tracker + self.conversation = Conversation( + name=f"maker_{self.name}_{self.id}" + ) + + # Statistics tracking + self.stats = { + "total_samples": 0, + "total_votes": 0, + "red_flagged": 0, + "steps_completed": 0, + "votes_per_step": [], + "samples_per_step": [], + } + + # Validate configuration + self._validate_config() + + if self.verbose: + logger.info(f"MAKER initialized: {self.name}") + logger.info( + f"Model: {self.model_name}, k={self.k}, max_tokens={self.max_tokens}" + ) + + def _validate_config(self): + """ + Validate the MAKER configuration. + + Raises: + ValueError: If configuration is invalid. + """ + if self.k < 1: + raise ValueError("k must be at least 1") + if self.max_tokens < 10: + raise ValueError("max_tokens must be at least 10") + if self.temperature < 0 or self.temperature > 2: + raise ValueError("temperature must be between 0 and 2") + if self.max_retries_per_step < 1: + raise ValueError("max_retries_per_step must be at least 1") + + def _default_format_prompt( + self, task: str, state: Any, step_idx: int, previous_result: Any + ) -> str: + """ + Default prompt formatter. + + Args: + task: The main task/objective to complete. + state: Current state of the task. + step_idx: Current step index (0-based). + previous_result: Result from the previous step (None for first step). + + Returns: + Formatted prompt string. + """ + prompt_parts = [f"Task: {task}", f"Step {step_idx + 1}:"] + + if state is not None: + prompt_parts.insert(1, f"Current state: {state}") + + if previous_result is not None: + prompt_parts.insert(-1, f"Previous result: {previous_result}") + + prompt_parts.append("Provide the result for this step.") + + return "\n".join(prompt_parts) + + def _default_parse_response(self, response_text: str) -> str: + """ + Default response parser. + + Args: + response_text: Raw LLM response. + + Returns: + Stripped response text as the result. + """ + return response_text.strip() + + def _default_validate_response( + self, response_text: str, max_tokens: int + ) -> bool: + """ + Default response validator (red-flagging). + + Args: + response_text: Raw LLM response. + max_tokens: Maximum allowed tokens. + + Returns: + True if response is valid, False to red-flag. + """ + # Estimate tokens (rough: 4 chars per token) + estimated_tokens = len(response_text) // 4 + + # Red-flag if too long + if estimated_tokens > max_tokens: + return False + + # Red-flag if empty + if not response_text.strip(): + return False + + return True + + def _default_update_state( + self, state: Any, result: Any, step_idx: int + ) -> Any: + """ + Default state update function (no-op). + + Args: + state: Current state. + result: Result from current step. + step_idx: Current step index. + + Returns: + Unchanged state. + """ + return state + + def _create_microagent(self, temperature: float = None) -> Agent: + """ + Create a focused microagent for a single step. + + Each microagent has minimal context and is focused on solving + exactly one step of the problem. + + Args: + temperature: Temperature for this agent's sampling. + + Returns: + An Agent instance configured for single-step execution. + """ + temp = temperature if temperature is not None else self.temperature + + agent = Agent( + agent_name=f"MAKER-MicroAgent-{uuid.uuid4().hex[:8]}", + agent_description="Focused microagent for single-step execution in MAKER framework", + system_prompt=self.system_prompt, + model_name=self.model_name, + max_tokens=self.max_tokens, + temperature=temp, + max_loops=1, + verbose=False, + print_on=False, + output_type="str-all-except-first", + ) + + return agent + + def _get_agent(self, temperature: float = None) -> Agent: + """ + Get an agent for voting. + + If agents were provided, returns one from the pool. + Otherwise, creates a new microagent. + + Args: + temperature: Temperature for agent sampling. + + Returns: + An Agent instance. + """ + if self.agents is not None and len(self.agents) > 0: + # Cycle through provided agents + agent_idx = self.stats["total_samples"] % len(self.agents) + return self.agents[agent_idx] + else: + return self._create_microagent(temperature) + + def _make_hashable(self, result: Any) -> Any: + """ + Convert a result to a hashable type for voting. + + Args: + result: The result to convert. + + Returns: + A hashable version of the result. + """ + if isinstance(result, (str, int, float, bool, type(None))): + return result + elif isinstance(result, (list, tuple)): + return tuple(self._make_hashable(item) for item in result) + elif isinstance(result, dict): + return tuple( + sorted( + (k, self._make_hashable(v)) for k, v in result.items() + ) + ) + elif isinstance(result, set): + return frozenset(self._make_hashable(item) for item in result) + else: + # Fall back to string representation + return str(result) + + def _unhash_result(self, hashable: Any, original_type: type) -> Any: + """ + Convert a hashable result back to its original type. + + Args: + hashable: The hashable result. + original_type: The original type of the result. + + Returns: + The result in its original type. + """ + if original_type in (str, int, float, bool, type(None)): + return hashable + elif original_type is list: + return list(hashable) if isinstance(hashable, tuple) else hashable + elif original_type is dict: + return dict(hashable) if isinstance(hashable, tuple) else hashable + elif original_type is set: + return set(hashable) if isinstance(hashable, frozenset) else hashable + else: + return hashable + + def get_vote( + self, + task: str, + state: Any, + step_idx: int, + previous_result: Any = None, + temperature: float = None, + ) -> Optional[Tuple[Any, str, type]]: + """ + Get a single vote for the current step. + + Samples from the LLM and applies red-flagging. If the response has + red flags, returns None (the vote is discarded). + + This implements Algorithm 3 (get_vote) from the paper. + + Args: + task: The main task/objective being solved. + state: Current state of the task. + step_idx: Current step index. + previous_result: Result from previous step. + temperature: Temperature for sampling. + + Returns: + Tuple of (hashable_result, raw_response, original_type) if valid, + None if red-flagged. + """ + self.stats["total_samples"] += 1 + + agent = self._get_agent(temperature) + prompt = self.format_prompt(task, state, step_idx, previous_result) + + try: + response = agent.run(task=prompt) + + # Red-flag check + if not self.validate_response(response, self.max_tokens): + self.stats["red_flagged"] += 1 + if self.verbose: + logger.debug(f"Red-flagged response at step {step_idx + 1}") + return None + + # Parse the response + result = self.parse_response(response) + original_type = type(result) + + # Convert to hashable for voting + hashable_result = self._make_hashable(result) + + self.stats["total_votes"] += 1 + return (hashable_result, response, original_type) + + except Exception as e: + self.stats["red_flagged"] += 1 + if self.verbose: + logger.debug( + f"Red-flagged response at step {step_idx + 1} (exception: {e})" + ) + return None + + def do_voting( + self, + task: str, + state: Any, + step_idx: int, + previous_result: Any = None, + ) -> Tuple[Any, str]: + """ + Perform first-to-ahead-by-k voting for the current step. + + Samples votes until one candidate result is k votes ahead of all others. + This provides statistical error correction by requiring consensus. + + This implements Algorithm 2 (do_voting) from the paper. + + Args: + task: The main task/objective being solved. + state: Current state of the task. + step_idx: Current step index. + previous_result: Result from previous step. + + Returns: + Tuple of (result, raw_response) for the winning candidate. + + Raises: + RuntimeError: If max_retries_per_step is exceeded without finding a winner. + """ + votes = {} # hashable_result -> vote count + responses = {} # hashable_result -> raw_response + original_types = {} # hashable_result -> original_type + samples_this_step = 0 + votes_this_step = 0 + is_first_vote = True + + while samples_this_step < self.max_retries_per_step: + # Use temperature 0 for first vote, then configured temperature + temp = self.temperature_first if is_first_vote else self.temperature + is_first_vote = False + + # Get a vote + result = self.get_vote(task, state, step_idx, previous_result, temp) + samples_this_step += 1 + + if result is None: + # Red-flagged, try again + continue + + hashable_result, response, original_type = result + votes_this_step += 1 + + # Update vote count + if hashable_result not in votes: + votes[hashable_result] = 0 + responses[hashable_result] = response + original_types[hashable_result] = original_type + votes[hashable_result] += 1 + + # Check if we have a winner (first-to-ahead-by-k) + current_count = votes[hashable_result] + max_other = max( + (v for r, v in votes.items() if r != hashable_result), + default=0, + ) + + if current_count >= max_other + self.k: + # We have a winner! + self.stats["votes_per_step"].append(votes_this_step) + self.stats["samples_per_step"].append(samples_this_step) + + if self.verbose: + logger.debug( + f"Step {step_idx + 1} decided with {votes_this_step} votes " + f"({samples_this_step} samples, winner: {current_count} votes)" + ) + + # Convert back to original type + final_result = self._unhash_result( + hashable_result, original_types[hashable_result] + ) + return final_result, responses[hashable_result] + + # If we get here, we've exceeded max retries + raise RuntimeError( + f"Step {step_idx + 1}: Failed to reach consensus after " + f"{self.max_retries_per_step} samples. Vote distribution: {votes}" + ) + + def run(self, task: str, max_steps: int = None) -> List[Any]: + """ + Run the MAKER framework to solve the given task. + + Executes the complete solution process, generating results step-by-step + using maximal decomposition with error correction through voting. + + This implements Algorithm 1 (generate_solution) from the paper. + + Args: + task: The main task/objective to complete. This is the primary input + that defines what the MAKER framework should solve. + max_steps: Number of steps to execute. Required parameter. + + Returns: + List of results from each step. + + Raises: + ValueError: If task is not provided or max_steps is not specified. + RuntimeError: If voting fails on any step. + + Example: + >>> maker = MAKER( + ... system_prompt="Solve math problems step by step.", + ... k=3 + ... ) + >>> results = maker.run( + ... task="Calculate 2^10 by doubling, starting from 2", + ... max_steps=9 + ... ) + """ + if not task: + raise ValueError("task is required - this is the objective to complete") + if max_steps is None: + raise ValueError("max_steps is required - specify how many steps to execute") + + if self.verbose: + logger.info(f"Starting MAKER with {max_steps} steps, k={self.k}") + logger.info(f"Task: {task[:100]}..." if len(task) > 100 else f"Task: {task}") + + # Initialize state + state = self.initial_state + + results = [] + previous_result = None + + for step_idx in range(max_steps): + if self.verbose and (step_idx + 1) % max(1, max_steps // 10) == 0: + logger.info(f"Progress: {step_idx + 1}/{max_steps} steps completed") + + # Do voting for this step + result, response = self.do_voting(task, state, step_idx, previous_result) + + # Record the result + results.append(result) + + # Update state + state = self.update_state(state, result, step_idx) + previous_result = result + + self.stats["steps_completed"] = step_idx + 1 + + # Log to conversation + self.conversation.add( + role=f"Step-{step_idx + 1}", + content=f"Result: {result}", + ) + + if self.verbose: + self._log_statistics() + + return results + + def run_until_condition( + self, + task: str, + stop_condition: Callable[[Any, List[Any], int], bool], + max_steps: int = 1000, + ) -> List[Any]: + """ + Run MAKER until a stopping condition is met. + + Useful for tasks where the number of steps is not known in advance. + + Args: + task: The main task/objective to complete. + stop_condition: Function(current_state, results, step_idx) -> bool + that returns True when the task is complete. + max_steps: Maximum steps to prevent infinite loops. + + Returns: + List of results from each step. + + Example: + >>> def is_complete(state, results, step_idx): + ... return "DONE" in str(results[-1]) if results else False + >>> + >>> maker = MAKER(system_prompt="...", k=3) + >>> results = maker.run_until_condition( + ... task="Solve this problem until you reach the answer", + ... stop_condition=is_complete, + ... max_steps=100 + ... ) + """ + if not task: + raise ValueError("task is required - this is the objective to complete") + if stop_condition is None: + raise ValueError("stop_condition must be provided") + + state = self.initial_state + + if self.verbose: + logger.info(f"Starting MAKER (conditional), max_steps={max_steps}, k={self.k}") + logger.info(f"Task: {task[:100]}..." if len(task) > 100 else f"Task: {task}") + + results = [] + previous_result = None + + for step_idx in range(max_steps): + # Check stop condition + if stop_condition(state, results, step_idx): + if self.verbose: + logger.info(f"Stop condition met at step {step_idx + 1}") + break + + if self.verbose and (step_idx + 1) % 10 == 0: + logger.info(f"Progress: {step_idx + 1} steps completed") + + # Do voting for this step + result, response = self.do_voting(task, state, step_idx, previous_result) + + results.append(result) + state = self.update_state(state, result, step_idx) + previous_result = result + self.stats["steps_completed"] = step_idx + 1 + + if self.verbose: + self._log_statistics() + + return results + + def run_parallel_voting(self, task: str, max_steps: int = None) -> List[Any]: + """ + Run MAKER with parallel vote sampling. + + An optimized version that samples k votes in parallel for each step, + which can significantly reduce wall-clock time while maintaining + the same error correction guarantees. + + Args: + task: The main task/objective to complete. + max_steps: Number of steps to execute. + + Returns: + List of results from each step. + """ + if not task: + raise ValueError("task is required - this is the objective to complete") + if max_steps is None: + raise ValueError("max_steps is required - specify how many steps to execute") + + state = self.initial_state + + if self.verbose: + logger.info(f"Starting MAKER (parallel) with {max_steps} steps, k={self.k}") + logger.info(f"Task: {task[:100]}..." if len(task) > 100 else f"Task: {task}") + + results = [] + previous_result = None + + for step_idx in range(max_steps): + if self.verbose and (step_idx + 1) % max(1, max_steps // 10) == 0: + logger.info(f"Progress: {step_idx + 1}/{max_steps} steps completed") + + result, response = self._do_voting_parallel( + task, state, step_idx, previous_result + ) + + results.append(result) + state = self.update_state(state, result, step_idx) + previous_result = result + self.stats["steps_completed"] = step_idx + 1 + + if self.verbose: + self._log_statistics() + + return results + + def _do_voting_parallel( + self, + task: str, + state: Any, + step_idx: int, + previous_result: Any = None, + ) -> Tuple[Any, str]: + """ + Parallel voting implementation. + + Samples k votes in parallel, then continues with sequential sampling + if no winner is found. + + Args: + task: The main task/objective being solved. + state: Current state of the task. + step_idx: Current step index. + previous_result: Result from previous step. + + Returns: + Tuple of (result, raw_response). + """ + votes = {} + responses = {} + original_types = {} + samples_this_step = 0 + votes_this_step = 0 + + # First round: sample k votes in parallel + with concurrent.futures.ThreadPoolExecutor( + max_workers=self.max_workers + ) as executor: + # First vote with temperature 0, rest with configured temperature + futures = [] + futures.append( + executor.submit( + self.get_vote, + task, + state, + step_idx, + previous_result, + self.temperature_first, + ) + ) + for _ in range(self.k - 1): + futures.append( + executor.submit( + self.get_vote, + task, + state, + step_idx, + previous_result, + self.temperature, + ) + ) + + for future in concurrent.futures.as_completed(futures): + samples_this_step += 1 + result = future.result() + if result is not None: + hashable_result, response, original_type = result + votes_this_step += 1 + if hashable_result not in votes: + votes[hashable_result] = 0 + responses[hashable_result] = response + original_types[hashable_result] = original_type + votes[hashable_result] += 1 + + # Check if we have a winner, continue sequentially if not + while samples_this_step < self.max_retries_per_step: + if votes: + leader = max(votes, key=votes.get) + leader_count = votes[leader] + max_other = max( + (v for r, v in votes.items() if r != leader), + default=0, + ) + + if leader_count >= max_other + self.k: + self.stats["votes_per_step"].append(votes_this_step) + self.stats["samples_per_step"].append(samples_this_step) + + final_result = self._unhash_result( + leader, original_types[leader] + ) + return final_result, responses[leader] + + # No winner yet, get more votes sequentially + result = self.get_vote( + task, state, step_idx, previous_result, self.temperature + ) + samples_this_step += 1 + + if result is not None: + hashable_result, response, original_type = result + votes_this_step += 1 + if hashable_result not in votes: + votes[hashable_result] = 0 + responses[hashable_result] = response + original_types[hashable_result] = original_type + votes[hashable_result] += 1 + + raise RuntimeError( + f"Step {step_idx + 1}: Failed to reach consensus after " + f"{self.max_retries_per_step} samples" + ) + + def _log_statistics(self): + """Log execution statistics.""" + logger.info("=" * 50) + logger.info("MAKER Execution Statistics") + logger.info("=" * 50) + logger.info(f"Steps completed: {self.stats['steps_completed']}") + logger.info(f"Total samples: {self.stats['total_samples']}") + logger.info(f"Total valid votes: {self.stats['total_votes']}") + logger.info(f"Red-flagged responses: {self.stats['red_flagged']}") + + if self.stats["votes_per_step"]: + avg_votes = sum(self.stats["votes_per_step"]) / len( + self.stats["votes_per_step"] + ) + max_votes = max(self.stats["votes_per_step"]) + logger.info(f"Average votes per step: {avg_votes:.2f}") + logger.info(f"Max votes for a step: {max_votes}") + + if self.stats["samples_per_step"]: + avg_samples = sum(self.stats["samples_per_step"]) / len( + self.stats["samples_per_step"] + ) + logger.info(f"Average samples per step: {avg_samples:.2f}") + + red_flag_rate = self.stats["red_flagged"] / max(1, self.stats["total_samples"]) + logger.info(f"Red-flag rate: {red_flag_rate:.2%}") + logger.info("=" * 50) + + def estimate_cost( + self, total_steps: int, target_success_probability: float = 0.95 + ) -> Dict[str, Any]: + """ + Estimate the expected cost of solving a task with given steps. + + Uses the theoretical framework from the paper to estimate costs + based on step success rate and voting threshold. + + Args: + total_steps: Total number of steps for the task. + target_success_probability: Target probability of solving the full task. + + Returns: + Dictionary containing cost estimates and statistics. + """ + # Estimate per-step success rate from current statistics + if self.stats["total_votes"] > 0: + valid_rate = self.stats["total_votes"] / max( + 1, self.stats["total_samples"] + ) + p = valid_rate * 0.99 # Assume 99% of valid votes are correct + else: + p = 0.99 # Default assumption + + # Calculate minimum k needed (Equation 14 from paper) + s = total_steps + t = target_success_probability + + if p > 0.5: + ratio = (1 - p) / p + try: + k_min = math.ceil(math.log(t ** (-1 / s) - 1) / math.log(ratio)) + except (ValueError, ZeroDivisionError): + k_min = 1 + else: + k_min = float("inf") + + # Expected samples per step (Equation 16 from paper) + if p > 0.5 and k_min != float("inf"): + expected_samples = k_min / (p * (2 * p - 1)) + else: + expected_samples = float("inf") + + return { + "estimated_p": p, + "estimated_k_min": k_min, + "expected_samples_per_step": expected_samples, + "expected_total_samples": ( + expected_samples * s + if expected_samples != float("inf") + else float("inf") + ), + "current_k": self.k, + "total_steps": s, + "target_success_probability": t, + } + + def get_statistics(self) -> Dict[str, Any]: + """ + Get execution statistics. + + Returns: + Dictionary containing execution statistics. + """ + return self.stats.copy() + + def reset(self): + """Reset the MAKER instance for a new run.""" + self.stats = { + "total_samples": 0, + "total_votes": 0, + "red_flagged": 0, + "steps_completed": 0, + "votes_per_step": [], + "samples_per_step": [], + } + self.conversation = Conversation(name=f"maker_{self.name}_{self.id}") + + +if __name__ == "__main__": + import re + + # Example: Using MAKER for a simple step-by-step task + print("MAKER: General-purpose example") + print("=" * 50) + + # Define task-specific functions for a counting task + def format_counting_prompt(task, state, step_idx, previous_result): + """Format prompt for counting task.""" + if previous_result is None: + return f"{task}\nThis is step 1. What is the first number? Reply with just the number." + return f"{task}\nThe previous number was {previous_result}. What is the next number? Reply with just the number." + + def parse_counting_response(response): + """Parse the counting response to extract the number.""" + numbers = re.findall(r"\d+", response) + if numbers: + return int(numbers[0]) + return response.strip() + + def validate_counting_response(response, max_tokens): + """Validate counting response.""" + if len(response) > max_tokens * 4: + return False + return bool(re.search(r"\d+", response)) + + # Create MAKER instance + maker = MAKER( + name="CountingExample", + description="MAKER example: counting numbers", + model_name="gpt-4o-mini", + system_prompt="You are a helpful assistant. When asked to count, respond with just the number, nothing else.", + format_prompt=format_counting_prompt, + parse_response=parse_counting_response, + validate_response=validate_counting_response, + k=2, + max_tokens=100, + temperature=0.1, + verbose=True, + ) + + print("\nRunning MAKER to count from 1 to 10...") + + # Run the solver with the task as the main input + try: + results = maker.run( + task="Count from 1 to 10, one number at a time", + max_steps=10, + ) + print(f"\nResults: {results}") + print("Expected: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]") + + # Show statistics + stats = maker.get_statistics() + print("\nStatistics:") + print(f" Steps completed: {stats['steps_completed']}") + print(f" Total samples: {stats['total_samples']}") + print(f" Red-flagged: {stats['red_flagged']}") + if stats["votes_per_step"]: + print( + f" Avg votes per step: {sum(stats['votes_per_step'])/len(stats['votes_per_step']):.2f}" + ) + except Exception as e: + print(f"Error: {e}") + print("(This example requires an API key to be configured)") diff --git a/swarms/utils/formatter.py b/swarms/utils/formatter.py index e3f0a73d..7e98b817 100644 --- a/swarms/utils/formatter.py +++ b/swarms/utils/formatter.py @@ -13,6 +13,7 @@ from rich.progress import ( from rich.table import Table from rich.text import Text from rich.spinner import Spinner +from rich.tree import Tree from rich.markdown import Markdown @@ -720,6 +721,85 @@ class Formatter: self.console.print() # Add blank line after stopping self._dashboard_live = None + def print_plan_tree( + self, + task_description: str, + steps: List[Dict[str, Any]], + print_on: bool = True, + ) -> None: + """ + Print the plan as a beautiful tree using Rich. + + Args: + task_description: Description of the main task + steps: List of step dictionaries with step_id, description, priority, and optional dependencies + print_on: Whether to print to console (True) or just log (False) + """ + import logging + + logger = logging.getLogger(__name__) + + # Create root tree + tree = Tree( + f"[bold cyan]📋 Plan: {task_description}[/bold cyan]" + ) + + # Priority color mapping + priority_colors = { + "critical": "red", + "high": "yellow", + "medium": "blue", + "low": "green", + } + + priority_icons = { + "critical": "🔴", + "high": "🟠", + "medium": "🟡", + "low": "🟢", + } + + # Create a mapping of step_id to tree nodes for dependency handling + step_nodes = {} + + # First pass: create all nodes + for step in steps: + step_id = step.get("step_id", "") + description = step.get("description", "") + priority = step.get("priority", "medium").lower() + dependencies = step.get("dependencies", []) + + priority_color = priority_colors.get(priority, "white") + priority_icon = priority_icons.get(priority, "○") + + # Create step label with priority indicator + step_label = ( + f"[{priority_color}]{priority_icon} {step_id}[/{priority_color}]: " + f"{description}" + ) + + # Add dependencies info if present + if dependencies: + deps_text = ", ".join(dependencies) + step_label += f" [dim](depends on: {deps_text})[/dim]" + + # Add node to tree + step_node = tree.add(step_label) + step_nodes[step_id] = step_node + + # Print the tree + if print_on: + self.console.print("\n") + self.console.print(tree) + self.console.print("") + else: + # Even if print_on is False, log the tree structure + logger.info(f"Plan created: {task_description}") + for step in steps: + logger.info( + f" - {step.get('step_id')} ({step.get('priority')}): {step.get('description')}" + ) + # Global formatter instance with markdown output enabled by default formatter = Formatter(md=False) diff --git a/tests/structs/test_custom_agent.py b/tests/structs/test_custom_agent.py index 63969b97..f04c845a 100644 --- a/tests/structs/test_custom_agent.py +++ b/tests/structs/test_custom_agent.py @@ -6,7 +6,6 @@ from swarms.structs.custom_agent import CustomAgent, AgentResponse try: import pytest_asyncio - ASYNC_AVAILABLE = True except ImportError: ASYNC_AVAILABLE = False @@ -41,10 +40,7 @@ def test_custom_agent_initialization(): timeout=30.0, verify_ssl=True, ) - assert ( - custom_agent_instance.base_url - == "https://api.example.com" - ) + assert custom_agent_instance.base_url == "https://api.example.com" assert custom_agent_instance.endpoint == "v1/endpoint" assert custom_agent_instance.timeout == 30.0 assert custom_agent_instance.verify_ssl is True @@ -55,9 +51,7 @@ def test_custom_agent_initialization(): raise -def test_custom_agent_initialization_with_default_headers( - sample_custom_agent, -): +def test_custom_agent_initialization_with_default_headers(sample_custom_agent): try: custom_agent_no_headers = CustomAgent( name="TestAgent", @@ -65,9 +59,7 @@ def test_custom_agent_initialization_with_default_headers( base_url="https://api.test.com", endpoint="test", ) - assert ( - "Content-Type" in custom_agent_no_headers.default_headers - ) + assert "Content-Type" in custom_agent_no_headers.default_headers assert ( custom_agent_no_headers.default_headers["Content-Type"] == "application/json" @@ -86,10 +78,7 @@ def test_custom_agent_url_normalization(): base_url="https://api.test.com/", endpoint="/v1/test", ) - assert ( - custom_agent_with_slashes.base_url - == "https://api.test.com" - ) + assert custom_agent_with_slashes.base_url == "https://api.test.com" assert custom_agent_with_slashes.endpoint == "v1/test" logger.debug("URL normalization works correctly") except Exception as e: @@ -101,22 +90,14 @@ def test_prepare_headers(sample_custom_agent): try: prepared_headers = sample_custom_agent._prepare_headers() assert "Authorization" in prepared_headers - assert ( - prepared_headers["Authorization"] == "Bearer test-token" - ) + assert prepared_headers["Authorization"] == "Bearer test-token" additional_headers = {"X-Custom-Header": "custom-value"} prepared_headers_with_additional = ( sample_custom_agent._prepare_headers(additional_headers) ) - assert ( - prepared_headers_with_additional["X-Custom-Header"] - == "custom-value" - ) - assert ( - prepared_headers_with_additional["Authorization"] - == "Bearer test-token" - ) + assert prepared_headers_with_additional["X-Custom-Header"] == "custom-value" + assert prepared_headers_with_additional["Authorization"] == "Bearer test-token" logger.debug("Header preparation works correctly") except Exception as e: logger.error(f"Failed to test prepare_headers: {e}") @@ -126,9 +107,7 @@ def test_prepare_headers(sample_custom_agent): def test_prepare_payload_dict(sample_custom_agent): try: payload_dict = {"key": "value", "number": 123} - prepared_payload = sample_custom_agent._prepare_payload( - payload_dict - ) + prepared_payload = sample_custom_agent._prepare_payload(payload_dict) assert isinstance(prepared_payload, str) parsed = json.loads(prepared_payload) assert parsed["key"] == "value" @@ -142,30 +121,22 @@ def test_prepare_payload_dict(sample_custom_agent): def test_prepare_payload_string(sample_custom_agent): try: payload_string = '{"test": "value"}' - prepared_payload = sample_custom_agent._prepare_payload( - payload_string - ) + prepared_payload = sample_custom_agent._prepare_payload(payload_string) assert prepared_payload == payload_string logger.debug("String payload prepared correctly") except Exception as e: - logger.error( - f"Failed to test prepare_payload with string: {e}" - ) + logger.error(f"Failed to test prepare_payload with string: {e}") raise def test_prepare_payload_bytes(sample_custom_agent): try: payload_bytes = b'{"test": "value"}' - prepared_payload = sample_custom_agent._prepare_payload( - payload_bytes - ) + prepared_payload = sample_custom_agent._prepare_payload(payload_bytes) assert prepared_payload == payload_bytes logger.debug("Bytes payload prepared correctly") except Exception as e: - logger.error( - f"Failed to test prepare_payload with bytes: {e}" - ) + logger.error(f"Failed to test prepare_payload with bytes: {e}") raise @@ -177,9 +148,7 @@ def test_parse_response_success(sample_custom_agent): mock_response.headers = {"content-type": "application/json"} mock_response.json.return_value = {"message": "success"} - parsed_response = sample_custom_agent._parse_response( - mock_response - ) + parsed_response = sample_custom_agent._parse_response(mock_response) assert isinstance(parsed_response, AgentResponse) assert parsed_response.status_code == 200 assert parsed_response.success is True @@ -198,9 +167,7 @@ def test_parse_response_error(sample_custom_agent): mock_response.text = "Not Found" mock_response.headers = {"content-type": "text/plain"} - parsed_response = sample_custom_agent._parse_response( - mock_response - ) + parsed_response = sample_custom_agent._parse_response(mock_response) assert isinstance(parsed_response, AgentResponse) assert parsed_response.status_code == 404 assert parsed_response.success is False @@ -222,15 +189,11 @@ def test_extract_content_openai_format(sample_custom_agent): } ] } - extracted_content = sample_custom_agent._extract_content( - openai_response - ) + extracted_content = sample_custom_agent._extract_content(openai_response) assert extracted_content == "This is the response content" logger.debug("OpenAI format content extracted correctly") except Exception as e: - logger.error( - f"Failed to test extract_content OpenAI format: {e}" - ) + logger.error(f"Failed to test extract_content OpenAI format: {e}") raise @@ -239,33 +202,25 @@ def test_extract_content_anthropic_format(sample_custom_agent): anthropic_response = { "content": [ {"text": "First part "}, - {"text": "second part"}, + {"text": "second part"} ] } - extracted_content = sample_custom_agent._extract_content( - anthropic_response - ) + extracted_content = sample_custom_agent._extract_content(anthropic_response) assert extracted_content == "First part second part" logger.debug("Anthropic format content extracted correctly") except Exception as e: - logger.error( - f"Failed to test extract_content Anthropic format: {e}" - ) + logger.error(f"Failed to test extract_content Anthropic format: {e}") raise def test_extract_content_generic_format(sample_custom_agent): try: generic_response = {"text": "Generic response text"} - extracted_content = sample_custom_agent._extract_content( - generic_response - ) + extracted_content = sample_custom_agent._extract_content(generic_response) assert extracted_content == "Generic response text" logger.debug("Generic format content extracted correctly") except Exception as e: - logger.error( - f"Failed to test extract_content generic format: {e}" - ) + logger.error(f"Failed to test extract_content generic format: {e}") raise @@ -274,18 +229,14 @@ def test_run_success(mock_client_class, sample_custom_agent): try: mock_response = Mock() mock_response.status_code = 200 - mock_response.text = ( - '{"choices": [{"message": {"content": "Success"}}]}' - ) + mock_response.text = '{"choices": [{"message": {"content": "Success"}}]}' mock_response.json.return_value = { "choices": [{"message": {"content": "Success"}}] } mock_response.headers = {"content-type": "application/json"} mock_client_instance = Mock() - mock_client_instance.__enter__ = Mock( - return_value=mock_client_instance - ) + mock_client_instance.__enter__ = Mock(return_value=mock_client_instance) mock_client_instance.__exit__ = Mock(return_value=None) mock_client_instance.post.return_value = mock_response mock_client_class.return_value = mock_client_instance @@ -308,9 +259,7 @@ def test_run_error_response(mock_client_class, sample_custom_agent): mock_response.text = "Internal Server Error" mock_client_instance = Mock() - mock_client_instance.__enter__ = Mock( - return_value=mock_client_instance - ) + mock_client_instance.__enter__ = Mock(return_value=mock_client_instance) mock_client_instance.__exit__ = Mock(return_value=None) mock_client_instance.post.return_value = mock_response mock_client_class.return_value = mock_client_instance @@ -331,13 +280,9 @@ def test_run_request_error(mock_client_class, sample_custom_agent): import httpx mock_client_instance = Mock() - mock_client_instance.__enter__ = Mock( - return_value=mock_client_instance - ) + mock_client_instance.__enter__ = Mock(return_value=mock_client_instance) mock_client_instance.__exit__ = Mock(return_value=None) - mock_client_instance.post.side_effect = httpx.RequestError( - "Connection failed" - ) + mock_client_instance.post.side_effect = httpx.RequestError("Connection failed") mock_client_class.return_value = mock_client_instance test_payload = {"message": "test"} @@ -350,33 +295,23 @@ def test_run_request_error(mock_client_class, sample_custom_agent): raise -@pytest.mark.skipif( - not ASYNC_AVAILABLE, reason="pytest-asyncio not installed" -) +@pytest.mark.skipif(not ASYNC_AVAILABLE, reason="pytest-asyncio not installed") @pytest.mark.asyncio @patch("swarms.structs.custom_agent.httpx.AsyncClient") -async def test_run_async_success( - mock_async_client_class, sample_custom_agent -): +async def test_run_async_success(mock_async_client_class, sample_custom_agent): try: mock_response = Mock() mock_response.status_code = 200 - mock_response.text = ( - '{"content": [{"text": "Async Success"}]}' - ) + mock_response.text = '{"content": [{"text": "Async Success"}]}' mock_response.json.return_value = { "content": [{"text": "Async Success"}] } mock_response.headers = {"content-type": "application/json"} mock_client_instance = AsyncMock() - mock_client_instance.__aenter__ = AsyncMock( - return_value=mock_client_instance - ) + mock_client_instance.__aenter__ = AsyncMock(return_value=mock_client_instance) mock_client_instance.__aexit__ = AsyncMock(return_value=None) - mock_client_instance.post = AsyncMock( - return_value=mock_response - ) + mock_client_instance.post = AsyncMock(return_value=mock_response) mock_async_client_class.return_value = mock_client_instance test_payload = {"message": "test"} @@ -389,27 +324,19 @@ async def test_run_async_success( raise -@pytest.mark.skipif( - not ASYNC_AVAILABLE, reason="pytest-asyncio not installed" -) +@pytest.mark.skipif(not ASYNC_AVAILABLE, reason="pytest-asyncio not installed") @pytest.mark.asyncio @patch("swarms.structs.custom_agent.httpx.AsyncClient") -async def test_run_async_error_response( - mock_async_client_class, sample_custom_agent -): +async def test_run_async_error_response(mock_async_client_class, sample_custom_agent): try: mock_response = Mock() mock_response.status_code = 400 mock_response.text = "Bad Request" mock_client_instance = AsyncMock() - mock_client_instance.__aenter__ = AsyncMock( - return_value=mock_client_instance - ) + mock_client_instance.__aenter__ = AsyncMock(return_value=mock_client_instance) mock_client_instance.__aexit__ = AsyncMock(return_value=None) - mock_client_instance.post = AsyncMock( - return_value=mock_response - ) + mock_client_instance.post = AsyncMock(return_value=mock_response) mock_async_client_class.return_value = mock_client_instance test_payload = {"message": "test"} @@ -439,4 +366,4 @@ def test_agent_response_dataclass(): logger.debug("AgentResponse dataclass created correctly") except Exception as e: logger.error(f"Failed to test AgentResponse dataclass: {e}") - raise + raise \ No newline at end of file