Merge pull request #787 from harshalmore31/octotools

Implemented OctoTools Framework with Swarms
4 months ago · ddb4119c04
parent c4710b50da cd65990ade
commit ddb4119c04
3 changed files with 734 additions and 581 deletions
--- a/swarms/structs/init.py
+++ b/swarms/structs/init.py
@ -72,7 +72,6 @@ from swarms.structs.swarming_architectures import (
    staircase_swarm,
    star_swarm,
 )
 from swarms.structs.swarms_api import (
    SwarmsAPIClient,
    SwarmRequest,
@ -81,6 +80,8 @@ from swarms.structs.swarms_api import (
    SwarmValidationError,
    AgentInput,
 )
 from swarms.structs.talk_hier import TalkHier, AgentRole, CommunicationEvent  
 from swarms.structs.octotools import OctoToolsSwarm, Tool, ToolType, get_default_tools  
 __all__ = [
    "Agent",
@ -147,6 +148,13 @@ __all__ = [
    "MultiAgentRouter",
    "MemeAgentGenerator",
    "ModelRouter",
    "OctoToolsSwarm",
    "Tool",
    "ToolType",
    "get_default_tools",
    "TalkHier",  
    "AgentRole",  
    "CommunicationEvent", 
    "SwarmsAPIClient",
    "SwarmRequest",
    "SwarmAuthenticationError",
--- a/swarms/structs/octotools.py
+++ b/swarms/structs/octotools.py
@ -0,0 +1,725 @@
 """
 OctoToolsSwarm: A multi-agent system for complex reasoning.
 Implements the OctoTools framework using swarms.
 """
 import json
 import logging
 import os
 import re
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional
 import math  # Import the math module
 from dotenv import load_dotenv
 from swarms import Agent
 from swarms.structs.conversation import Conversation
 # from exa_search import exa_search as web_search_execute
 # Load environment variables
 load_dotenv()
 # Setup logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class ToolType(Enum):
    """Defines the types of tools available."""
    IMAGE_CAPTIONER = "image_captioner"
    OBJECT_DETECTOR = "object_detector"
    WEB_SEARCH = "web_search"
    PYTHON_CALCULATOR = "python_calculator"
    # Add more tool types as needed
@dataclass
 class Tool:
    """
    Represents an external tool.
    Attributes:
        name: Unique name of the tool.
        description: Description of the tool's function.
        metadata: Dictionary containing tool metadata.
        execute_func: Callable function that executes the tool's logic.
    """
    name: str
    description: str
    metadata: Dict[str, Any]
    execute_func: Callable
    def execute(self, **kwargs):
        """Executes the tool's logic, handling potential errors."""
        try:
            return self.execute_func(**kwargs)
        except Exception as e:
            logger.error(f"Error executing tool {self.name}: {str(e)}")
            return {"error": str(e)}
 class AgentRole(Enum):
    """Defines the roles for agents in the OctoTools system."""
    PLANNER = "planner"
    VERIFIER = "verifier"
    SUMMARIZER = "summarizer"
 class OctoToolsSwarm:
    """
    A multi-agent system implementing the OctoTools framework.
    Attributes:
        model_name: Name of the LLM model to use.
        max_iterations: Maximum number of action-execution iterations.
        base_path: Path for saving agent states.
        tools: List of available Tool objects.
    """
    def __init__(
        self,
        tools: List[Tool],
        model_name: str = "gemini/gemini-2.0-flash",
        max_iterations: int = 10,
        base_path: Optional[str] = None,
    ):
        """Initialize the OctoToolsSwarm system."""
        self.model_name = model_name
        self.max_iterations = max_iterations
        self.base_path = Path(base_path) if base_path else Path("./octotools_states")
        self.base_path.mkdir(exist_ok=True)
        self.tools = {tool.name: tool for tool in tools}  # Store tools in a dictionary
        # Initialize agents
        self._init_agents()
        # Create conversation tracker and memory
        self.conversation = Conversation()
        self.memory = []  # Store the trajectory
    def _init_agents(self) -> None:
        """Initialize all agents with their specific roles and prompts."""
        # Planner agent
        self.planner = Agent(
            agent_name="OctoTools-Planner",
            system_prompt=self._get_planner_prompt(),
            model_name=self.model_name,
            max_loops=3,  
            saved_state_path=str(self.base_path / "planner.json"),
            verbose=True,  
        )
        # Verifier agent
        self.verifier = Agent(
            agent_name="OctoTools-Verifier",
            system_prompt=self._get_verifier_prompt(),
            model_name=self.model_name,
            max_loops=1,  
            saved_state_path=str(self.base_path / "verifier.json"),
            verbose=True,
        )
        # Summarizer agent
        self.summarizer = Agent(
            agent_name="OctoTools-Summarizer",
            system_prompt=self._get_summarizer_prompt(),
            model_name=self.model_name,
            max_loops=1, 
            saved_state_path=str(self.base_path / "summarizer.json"),
            verbose=True,
        )
    def _get_planner_prompt(self) -> str:
        """Get the prompt for the planner agent (Improved with few-shot examples)."""
        tool_descriptions = "\n".join(
            [
                f"- {tool_name}: {self.tools[tool_name].description}"
                for tool_name in self.tools
            ]
        )
        return f"""You are the Planner in the OctoTools framework. Your role is to analyze the user's query,
        identify required skills, suggest relevant tools, and plan the steps to solve the problem.
        1. **Analyze the user's query:** Understand the requirements and identify the necessary skills and potentially relevant tools.
        2. **Perform high-level planning:**  Create a rough outline of how tools might be used to solve the problem.
        3. **Perform low-level planning (action prediction):**  At each step, select the best tool to use and formulate a specific sub-goal for that tool, considering the current context.
        Available Tools:
        {tool_descriptions}
        Output your response in JSON format.  Here are examples for different stages:
        **Query Analysis (High-Level Planning):**
        Example Input:
        Query: "What is the capital of France?"
        Example Output:
        ```json
        {{
            "summary": "The user is asking for the capital of France.",
            "required_skills": ["knowledge retrieval"],
            "relevant_tools": ["Web_Search_Tool"]
        }}
        ```
        **Action Prediction (Low-Level Planning):**
        Example Input:
        Context: {{ "query": "What is the capital of France?", "available_tools": ["Web_Search_Tool"] }}
        Example Output:
        ```json
        {{
            "justification": "The Web_Search_Tool can be used to directly find the capital of France.",
            "context": {{}},
            "sub_goal": "Search the web for 'capital of France'.",
            "tool_name": "Web_Search_Tool"
        }}
        ```
        Another Example:
        Context: {{"query": "How many objects are in the image?", "available_tools": ["Image_Captioner_Tool", "Object_Detector_Tool"], "image": "objects.png"}}
        Example Output:
        ```json
        {{
            "justification": "First, get a general description of the image to understand the context.",
            "context": {{ "image": "objects.png" }},
            "sub_goal": "Generate a description of the image.",
            "tool_name": "Image_Captioner_Tool"
        }}
        ```
        Example for Finding Square Root:
        Context: {{"query": "What is the square root of the number of objects in the image?", "available_tools": ["Object_Detector_Tool", "Python_Calculator_Tool"], "image": "objects.png", "Object_Detector_Tool_result": ["object1", "object2", "object3", "object4"]}}
        Example Output:
        ```json
        {{
            "justification": "We have detected 4 objects in the image. Now we need to find the square root of 4.",
            "context": {{}},
            "sub_goal": "Calculate the square root of 4",
            "tool_name": "Python_Calculator_Tool"
        }}
        ```
        Your output MUST be a single, valid JSON object with the following keys:
            - justification (string): Your reasoning.
            - context (dict):  A dictionary containing relevant information.
            - sub_goal (string): The specific instruction for the tool.
            - tool_name (string): The EXACT name of the tool to use.
            Do NOT include any text outside of the JSON object.
        """
    def _get_verifier_prompt(self) -> str:
        """Get the prompt for the verifier agent (Improved with few-shot examples)."""
        return """You are the Context Verifier in the OctoTools framework. Your role is to analyze the current context
        and memory to determine if the problem is solved, if there are any inconsistencies, or if further steps are needed.
        Output your response in JSON format:
        Expected output structure:
        ```json
        {
            "completeness": "Indicate whether the query is fully, partially, or not answered.",
            "inconsistencies": "List any inconsistencies found in the context or memory.",
            "verification_needs": "List any information that needs further verification.",
            "ambiguities": "List any ambiguities found in the context or memory.",
            "stop_signal": true/false
        }
        ```
        Example Input:
        Context: { "last_result": { "result": "Caption: The image shows a cat." } }
        Memory: [ { "component": "Action Predictor", "result": { "tool_name": "Image_Captioner_Tool" } } ]
        Example Output:
        ```json
        {
            "completeness": "partial",
            "inconsistencies": [],
            "verification_needs": ["Object detection to confirm the presence of a cat."],
            "ambiguities": [],
            "stop_signal": false
        }
        ```
        Another Example:
        Context: { "last_result": { "result": ["Detected object: cat"] } }
        Memory:  [ { "component": "Action Predictor", "result": { "tool_name": "Object_Detector_Tool" } } ]
        Example Output:
        ```json
        {
            "completeness": "yes",
            "inconsistencies": [],
            "verification_needs": [],
            "ambiguities": [],
            "stop_signal": true
        }
        ```
        Square Root Example:
        Context: { 
            "query": "What is the square root of the number of objects in the image?", 
            "image": "example.png",
            "Object_Detector_Tool_result": ["object1", "object2", "object3", "object4"],
            "Python_Calculator_Tool_result": "Result of 4**0.5 is 2.0"
        }
        Memory: [
            { "component": "Action Predictor", "result": { "tool_name": "Object_Detector_Tool" } },
            { "component": "Action Predictor", "result": { "tool_name": "Python_Calculator_Tool" } }
        ]
        Example Output:
        ```json
        {
            "completeness": "yes",
            "inconsistencies": [],
            "verification_needs": [],
            "ambiguities": [],
            "stop_signal": true
        }
        ```
        """
    def _get_summarizer_prompt(self) -> str:
        """Get the prompt for the summarizer agent (Improved with few-shot examples)."""
        return """You are the Solution Summarizer in the OctoTools framework.  Your role is to synthesize the final
        answer to the user's query based on the complete trajectory of actions and results.
        Output your response in JSON format:
        Expected output structure:
         ```json
        {
            "final_answer": "Provide a clear and concise answer to the original query."
        }
        ```
        Example Input:
        Memory: [
            {"component": "Query Analyzer", "result": {"summary": "Find the capital of France."}},
            {"component": "Action Predictor", "result": {"tool_name": "Web_Search_Tool"}},
            {"component": "Tool Execution", "result": {"result": "The capital of France is Paris."}}
        ]
        Example Output:
        ```json
        {
            "final_answer": "The capital of France is Paris."
        }
        ```
        Square Root Example:
        Memory: [
            {"component": "Query Analyzer", "result": {"summary": "Find the square root of the number of objects in the image."}},
            {"component": "Action Predictor", "result": {"tool_name": "Object_Detector_Tool", "sub_goal": "Detect objects in the image"}},
            {"component": "Tool Execution", "result": {"result": ["object1", "object2", "object3", "object4"]}},
            {"component": "Action Predictor", "result": {"tool_name": "Python_Calculator_Tool", "sub_goal": "Calculate the square root of 4"}},
            {"component": "Tool Execution", "result": {"result": "Result of 4**0.5 is 2.0"}}
        ]
        Example Output:
        ```json
        {
            "final_answer": "The square root of the number of objects in the image is 2.0. There are 4 objects in the image, and the square root of 4 is 2.0."
        }
        ```
        """
    def _safely_parse_json(self, json_str: str) -> Dict[str, Any]:
        """Safely parse JSON, handling errors and using recursive descent."""
        try:
            return json.loads(json_str)
        except json.JSONDecodeError:
            logger.warning(f"JSONDecodeError: Attempting to extract JSON from: {json_str}")
            try:
                # More robust JSON extraction with recursive descent
                def extract_json(s):
                    stack = []
                    start = -1
                    for i, c in enumerate(s):
                        if c == '{':
                            if not stack:
                                start = i
                            stack.append(c)
                        elif c == '}':
                            if stack:
                                stack.pop()
                                if not stack and start != -1:
                                    return s[start:i+1]
                    return None
                extracted_json = extract_json(json_str)
                if extracted_json:
                    logger.info(f"Extracted JSON: {extracted_json}")
                    return json.loads(extracted_json)
                else:
                    logger.error("Failed to extract JSON using recursive descent.")
                    return {"error": "Failed to parse JSON", "content": json_str}
            except Exception as e:
                logger.exception(f"Error during JSON extraction: {e}")
                return {"error": "Failed to parse JSON", "content": json_str}
    def _execute_tool(self, tool_name: str, context: Dict[str, Any]) -> Dict[str, Any]:
        """Executes a tool based on its name and provided context."""
        if tool_name not in self.tools:
            return {"error": f"Tool '{tool_name}' not found."}
        tool = self.tools[tool_name]
        try:
            # For Python Calculator tool, handle object counts from Object Detector
            if tool_name == "Python_Calculator_Tool":
                # Check for object detector results
                object_detector_result = context.get("Object_Detector_Tool_result")
                if object_detector_result and isinstance(object_detector_result, list):
                    # Calculate the number of objects
                    num_objects = len(object_detector_result)
                    # If sub_goal doesn't already contain an expression, create one
                    if "sub_goal" in context and "Calculate the square root" in context["sub_goal"]:
                        context["expression"] = f"{num_objects}**0.5"
                    elif "expression" not in context:
                        # Default to square root if no expression is specified
                        context["expression"] = f"{num_objects}**0.5"
            # Filter context: only pass expected inputs to the tool
            valid_inputs = {
                k: v for k, v in context.items() if k in tool.metadata.get("input_types", {})
            }
            result = tool.execute(**valid_inputs)
            return {"result": result}
        except Exception as e:
            logger.exception(f"Error executing tool {tool_name}: {e}")
            return {"error": str(e)}
    def _run_agent(self, agent: Agent, input_prompt: str) -> Dict[str, Any]:
        """Runs a swarms agent, handling output and JSON parsing."""
        try:
            # Construct the full input, including the system prompt
            full_input = f"{agent.system_prompt}\n\n{input_prompt}"
            # Run the agent and capture the output
            agent_response = agent.run(full_input)
            logger.info(f"DEBUG: Raw agent response: {agent_response}")
            # Extract the LLM's response (remove conversation history, etc.)
            response_text = agent_response # Assuming direct return
            # Try to parse the response as JSON
            parsed_response = self._safely_parse_json(response_text)
            return parsed_response
        except Exception as e:
            logger.exception(f"Error running agent {agent.agent_name}: {e}")
            return {"error": f"Agent {agent.agent_name} failed: {str(e)}"}
    def run(self, query: str, image: Optional[str] = None) -> Dict[str, Any]:
        """Execute the task through the multi-agent workflow."""
        logger.info(f"Starting task: {query}")
        try:
            # Step 1: Query Analysis (High-Level Planning)
            planner_input = (
                f"Analyze the following query and determine the necessary skills and"
                f" relevant tools: {query}"
            )
            query_analysis = self._run_agent(self.planner, planner_input)
            if "error" in query_analysis:
                return {
                    "error": f"Planner query analysis failed: {query_analysis['error']}",
                    "trajectory": self.memory,
                    "conversation": self.conversation.return_history_as_string(),
                }
            self.memory.append(
                {"step": 0, "component": "Query Analyzer", "result": query_analysis}
            )
            self.conversation.add(
                role=self.planner.agent_name, content=json.dumps(query_analysis)
            )
            # Initialize context with the query and image (if provided)
            context = {"query": query}
            if image:
                context["image"] = image
            # Add available tools to context
            if "relevant_tools" in query_analysis:
                context["available_tools"] = query_analysis["relevant_tools"]
            else:
                # If no relevant tools specified, make all tools available
                context["available_tools"] = list(self.tools.keys())
            step_count = 1
            # Step 2: Iterative Action-Execution Loop
            while step_count <= self.max_iterations:
                logger.info(f"Starting iteration {step_count} of {self.max_iterations}")
                # Step 2a: Action Prediction (Low-Level Planning)
                action_planner_input = (
                    f"Current Context: {json.dumps(context)}\nAvailable Tools:"
                    f" {', '.join(context.get('available_tools', list(self.tools.keys())))}\nPlan the"
                    " next step."
                )
                action = self._run_agent(self.planner, action_planner_input)
                if "error" in action:
                    logger.error(f"Error in action prediction: {action['error']}")
                    return {
                        "error": f"Planner action prediction failed: {action['error']}",
                        "trajectory": self.memory,
                        "conversation": self.conversation.return_history_as_string()
                    }
                self.memory.append(
                    {"step": step_count, "component": "Action Predictor", "result": action}
                )
                self.conversation.add(role=self.planner.agent_name, content=json.dumps(action))
                # Input Validation for Action (Relaxed)
                if not isinstance(action, dict) or "tool_name" not in action or "sub_goal" not in action:
                    error_msg = (
                        "Action prediction did not return required fields (tool_name,"
                        " sub_goal) or was not a dictionary."
                    )
                    logger.error(error_msg)
                    self.memory.append(
                        {"step": step_count, "component": "Error", "result": error_msg}
                    )
                    break
                # Step 2b: Execute Tool
                tool_execution_context = {
                    **context,
                    **action.get("context", {}),  # Add any additional context
                    "sub_goal": action["sub_goal"],  # Pass sub_goal to tool
                }
                tool_result = self._execute_tool(action["tool_name"], tool_execution_context)
                self.memory.append(
                    {
                        "step": step_count,
                        "component": "Tool Execution",
                        "result": tool_result,
                    }
                )
                # Step 2c: Context Update - Store result with a descriptive key
                if "result" in tool_result:
                    context[f"{action['tool_name']}_result"] = tool_result["result"]
                if "error" in tool_result:
                    context[f"{action['tool_name']}_error"] = tool_result["error"]
                # Step 2d: Context Verification
                verifier_input = (
                    f"Current Context: {json.dumps(context)}\nMemory:"
                    f" {json.dumps(self.memory)}\nQuery: {query}"
                )
                verification = self._run_agent(self.verifier, verifier_input)
                if "error" in verification:
                    return {
                        "error": f"Verifier failed: {verification['error']}",
                        "trajectory": self.memory,
                        "conversation": self.conversation.return_history_as_string(),
                    }
                self.memory.append(
                    {
                        "step": step_count,
                        "component": "Context Verifier",
                        "result": verification,
                    }
                )
                self.conversation.add(role=self.verifier.agent_name, content=json.dumps(verification))
                # Check for stop signal from Verifier
                if verification.get("stop_signal") is True:
                    logger.info("Received stop signal from verifier. Stopping iterations.")
                    break
                # Safety mechanism - if we've executed the same tool multiple times
                same_tool_count = sum(
                    1 for m in self.memory 
                    if m.get("component") == "Action Predictor" 
                    and m.get("result", {}).get("tool_name") == action.get("tool_name")
                )
                if same_tool_count > 3:
                    logger.warning(f"Tool {action.get('tool_name')} used more than 3 times. Forcing stop.")
                    break
                step_count += 1
            # Step 3: Solution Summarization
            summarizer_input = f"Complete Trajectory: {json.dumps(self.memory)}\nOriginal Query: {query}"
            summarization = self._run_agent(self.summarizer, summarizer_input)
            if "error" in summarization:
                return {
                    "error": f"Summarizer failed: {summarization['error']}",
                    "trajectory": self.memory,
                    "conversation": self.conversation.return_history_as_string()
                }
            self.conversation.add(role=self.summarizer.agent_name, content=json.dumps(summarization))
            return {
                "final_answer": summarization.get("final_answer", "No answer found."),
                "trajectory": self.memory,
                "conversation": self.conversation.return_history_as_string(),
            }
        except Exception as e:
            logger.exception(f"Unexpected error in run method: {e}")  # More detailed
            return {
                "error": str(e),
                "trajectory": self.memory,
                "conversation": self.conversation.return_history_as_string(),
            }
    def save_state(self) -> None:
        """Save the current state of all agents."""
        for agent in [self.planner, self.verifier, self.summarizer]:
            try:
                agent.save_state()
            except Exception as e:
                logger.error(f"Error saving state for {agent.agent_name}: {str(e)}")
    def load_state(self) -> None:
        """Load the saved state of all agents."""
        for agent in [self.planner, self.verifier, self.summarizer]:
            try:
                agent.load_state()
            except Exception as e:
                logger.error(f"Error loading state for {agent.agent_name}: {str(e)}")
 # --- Example Usage ---
 # Define dummy tool functions (replace with actual implementations)
 def image_captioner_execute(image: str, prompt: str = "Describe the image", **kwargs) -> str:
    """Dummy image captioner."""
    print(f"image_captioner_execute called with image: {image}, prompt: {prompt}")
    return f"Caption for {image}: A descriptive caption (dummy)."  # Simplified
 def object_detector_execute(image: str, labels: List[str] = [], **kwargs) -> List[str]:
    """Dummy object detector, handles missing labels gracefully."""
    print(f"object_detector_execute called with image: {image}, labels: {labels}")
    if not labels:
        return ["object1", "object2", "object3", "object4"]  # Return default objects if no labels
    return [f"Detected {label}" for label in labels]  # Simplified
 def web_search_execute(query: str, **kwargs) -> str:
    """Dummy web search."""
    print(f"web_search_execute called with query: {query}")
    return f"Search results for '{query}'..."  # Simplified
 def python_calculator_execute(expression: str, **kwargs) -> str:
    """Python calculator (using math module)."""
    print(f"python_calculator_execute called with: {expression}")
    try:
        # Safely evaluate only simple expressions involving numbers and basic operations
        if re.match(r"^[0-9+\-*/().\s]+$", expression):
            result = eval(expression, {"__builtins__": {}, "math": math})
            return f"Result of {expression} is {result}"
        else:
            return "Error: Invalid expression for calculator."
    except Exception as e:
        return f"Error: {e}"
 # Create utility function to get default tools
 def get_default_tools() -> List[Tool]:
    """Returns a list of default tools that can be used with OctoToolsSwarm."""
    image_captioner = Tool(
        name="Image_Captioner_Tool",
        description="Generates a caption for an image.",
        metadata={
            "input_types": {"image": "str", "prompt": "str"},
            "output_type": "str",
            "limitations": "May struggle with complex scenes or ambiguous objects.",
            "best_practices": "Use with clear, well-lit images. Provide specific prompts for better results.",
        },
        execute_func=image_captioner_execute,
    )
    object_detector = Tool(
        name="Object_Detector_Tool",
        description="Detects objects in an image.",
        metadata={
            "input_types": {"image": "str", "labels": "list"},
            "output_type": "list",
            "limitations": "Accuracy depends on the quality of the image and the clarity of the objects.",
            "best_practices": "Provide a list of specific object labels to detect. Use high-resolution images.",
        },
        execute_func=object_detector_execute,
    )
    web_search = Tool(
        name="Web_Search_Tool",
        description="Performs a web search.",
        metadata={
            "input_types": {"query": "str"}, 
            "output_type": "str",
            "limitations": "May not find specific or niche information.",
            "best_practices": "Use specific and descriptive keywords for better results.",
        },
        execute_func=web_search_execute,
    )
    calculator = Tool(
        name="Python_Calculator_Tool",
        description="Evaluates a Python expression.",
        metadata={
            "input_types": {"expression": "str"}, 
            "output_type": "str",
            "limitations": "Cannot handle complex mathematical functions or libraries.",
            "best_practices": "Use for basic arithmetic and simple calculations.",
        },
        execute_func=python_calculator_execute,
    )
    return [image_captioner, object_detector, web_search, calculator]
 # Only execute the example when this script is run directly
 # if __name__ == "__main__":
 #     print("Running OctoToolsSwarm example...")
 #     # Create an OctoToolsSwarm agent with default tools
 #     tools = get_default_tools()
 #     agent = OctoToolsSwarm(tools=tools)
 #     # Example query
 #     query = "What is the square root of the number of objects in this image?"
 #     # Create a dummy image file for testing if it doesn't exist
 #     image_path = "example.png"
 #     if not os.path.exists(image_path):
 #         with open(image_path, "w") as f:
 #             f.write("Dummy image content")
 #         print(f"Created dummy image file: {image_path}")
 #     # Run the agent
 #     result = agent.run(query, image=image_path)
 #     # Display results
 #     print("\n=== FINAL ANSWER ===")
 #     print(result["final_answer"])
 #     print("\n=== TRAJECTORY SUMMARY ===")
 #     for step in result["trajectory"]:
 #         print(f"Step {step.get('step', 'N/A')}: {step.get('component', 'Unknown')}")
 #     print("\nOctoToolsSwarm example completed.")
--- a/swarms/structs/talktier.py
+++ b/swarms/structs/talktier.py
@ -1,580 +0,0 @@
 """
 TalkHier: A hierarchical multi-agent framework for content generation and refinement.
 Implements structured communication and evaluation protocols.
 """
 import json
 import logging
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 from swarms import Agent
 from swarms.structs.conversation import Conversation
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class AgentRole(Enum):
    """Defines the possible roles for agents in the system."""
    SUPERVISOR = "supervisor"
    GENERATOR = "generator"
    EVALUATOR = "evaluator"
    REVISOR = "revisor"
@dataclass
 class CommunicationEvent:
    """Represents a structured communication event between agents."""
    message: str
    background: Optional[str] = None
    intermediate_output: Optional[Dict[str, Any]] = None
 class TalkHier:
    """
    A hierarchical multi-agent system for content generation and refinement.
    Implements the TalkHier framework with structured communication protocols
    and hierarchical refinement processes.
    Attributes:
        max_iterations: Maximum number of refinement iterations
        quality_threshold: Minimum score required for content acceptance
        model_name: Name of the LLM model to use
        base_path: Path for saving agent states
    """
    def __init__(
        self,
        max_iterations: int = 3,
        quality_threshold: float = 0.8,
        model_name: str = "gpt-4",
        base_path: Optional[str] = None,
        return_string: bool = False,
    ):
        """Initialize the TalkHier system."""
        self.max_iterations = max_iterations
        self.quality_threshold = quality_threshold
        self.model_name = model_name
        self.return_string = return_string
        self.base_path = (
            Path(base_path) if base_path else Path("./agent_states")
        )
        self.base_path.mkdir(exist_ok=True)
        # Initialize agents
        self._init_agents()
        # Create conversation
        self.conversation = Conversation()
    def _safely_parse_json(self, json_str: str) -> Dict[str, Any]:
        """
        Safely parse JSON string, handling various formats and potential errors.
        Args:
            json_str: String containing JSON data
        Returns:
            Parsed dictionary
        """
        try:
            # Try direct JSON parsing
            return json.loads(json_str)
        except json.JSONDecodeError:
            try:
                # Try extracting JSON from potential text wrapper
                import re
                json_match = re.search(r"\{.*\}", json_str, re.DOTALL)
                if json_match:
                    return json.loads(json_match.group())
                # Try extracting from markdown code blocks
                code_block_match = re.search(
                    r"```(?:json)?\s*(\{.*?\})\s*```",
                    json_str,
                    re.DOTALL,
                )
                if code_block_match:
                    return json.loads(code_block_match.group(1))
            except Exception as e:
                logger.warning(f"Failed to extract JSON: {str(e)}")
            # Fallback: create structured dict from text
            return {
                "content": json_str,
                "metadata": {
                    "parsed": False,
                    "timestamp": str(datetime.now()),
                },
            }
    def _init_agents(self) -> None:
        """Initialize all agents with their specific roles and prompts."""
        # Main supervisor agent
        self.main_supervisor = Agent(
            agent_name="Main-Supervisor",
            system_prompt=self._get_supervisor_prompt(),
            model_name=self.model_name,
            max_loops=1,
            saved_state_path=str(
                self.base_path / "main_supervisor.json"
            ),
            verbose=True,
        )
        # Generator agent
        self.generator = Agent(
            agent_name="Content-Generator",
            system_prompt=self._get_generator_prompt(),
            model_name=self.model_name,
            max_loops=1,
            saved_state_path=str(self.base_path / "generator.json"),
            verbose=True,
        )
        # Evaluators
        self.evaluators = [
            Agent(
                agent_name=f"Evaluator-{i}",
                system_prompt=self._get_evaluator_prompt(i),
                model_name=self.model_name,
                max_loops=1,
                saved_state_path=str(
                    self.base_path / f"evaluator_{i}.json"
                ),
                verbose=True,
            )
            for i in range(3)
        ]
        # Revisor agent
        self.revisor = Agent(
            agent_name="Content-Revisor",
            system_prompt=self._get_revisor_prompt(),
            model_name=self.model_name,
            max_loops=1,
            saved_state_path=str(self.base_path / "revisor.json"),
            verbose=True,
        )
    def _get_supervisor_prompt(self) -> str:
        """Get the prompt for the supervisor agent."""
        return """You are a Supervisor agent responsible for orchestrating the content generation process. Your role is to analyze tasks, develop strategies, and coordinate other agents effectively.
 You must carefully analyze each task to understand:
 - The core objectives and requirements
 - Target audience and their needs
 - Complexity level and scope
 - Any constraints or special considerations
 Based on your analysis, develop a clear strategy that:
 - Breaks down the task into manageable steps
 - Identifies which agents are best suited for each step
 - Anticipates potential challenges
 - Sets clear success criteria
 Output all responses in strict JSON format:
 {
    "thoughts": {
        "task_analysis": "Detailed analysis of requirements, audience, scope, and constraints",
        "strategy": "Step-by-step plan including agent allocation and success metrics",
        "concerns": "Potential challenges, edge cases, and mitigation strategies"
    },
    "next_action": {
        "agent": "Specific agent to engage (Generator, Evaluator, or Revisor)",
        "instruction": "Detailed instructions including context, requirements, and expected output"
    }
 }"""
    def _get_generator_prompt(self) -> str:
        """Get the prompt for the generator agent."""
        return """You are a Generator agent responsible for creating high-quality, original content. Your role is to produce content that is engaging, informative, and tailored to the target audience.
 When generating content:
 - Thoroughly research and fact-check all information
 - Structure content logically with clear flow
 - Use appropriate tone and language for the target audience
 - Include relevant examples and explanations
 - Ensure content is original and plagiarism-free
 - Consider SEO best practices where applicable
 Output all responses in strict JSON format:
 {
    "content": {
        "main_body": "The complete generated content with proper formatting and structure",
        "metadata": {
            "word_count": "Accurate word count of main body",
            "target_audience": "Detailed audience description",
            "key_points": ["List of main points covered"],
            "sources": ["List of reference sources if applicable"],
            "readability_level": "Estimated reading level",
            "tone": "Description of content tone"
        }
    }
 }"""
    def _get_evaluator_prompt(self, evaluator_id: int) -> str:
        """Get the prompt for an evaluator agent."""
        return f"""You are Evaluator {evaluator_id}, responsible for critically assessing content quality. Your evaluation must be thorough, objective, and constructive.
 Evaluate content across multiple dimensions:
 - Accuracy: factual correctness, source reliability
 - Clarity: readability, organization, flow
 - Coherence: logical consistency, argument structure
 - Engagement: interest level, relevance
 - Completeness: topic coverage, depth
 - Technical quality: grammar, spelling, formatting
 - Audience alignment: appropriate level and tone
 Output all responses in strict JSON format:
 {{
    "scores": {{
        "overall": "0.0-1.0 composite score",
        "categories": {{
            "accuracy": "0.0-1.0 score with evidence",
            "clarity": "0.0-1.0 score with examples",
            "coherence": "0.0-1.0 score with analysis",
            "engagement": "0.0-1.0 score with justification",
            "completeness": "0.0-1.0 score with gaps identified",
            "technical_quality": "0.0-1.0 score with issues noted",
            "audience_alignment": "0.0-1.0 score with reasoning"
        }}
    }},
    "feedback": [
        "Specific, actionable improvement suggestions",
        "Examples of issues found",
        "Recommendations for enhancement"
    ],
    "strengths": ["Notable positive aspects"],
    "weaknesses": ["Areas needing improvement"]
 }}"""
    def _get_revisor_prompt(self) -> str:
        """Get the prompt for the revisor agent."""
        return """You are a Revisor agent responsible for improving content based on evaluator feedback. Your role is to enhance content while maintaining its core message and purpose.
 When revising content:
 - Address all evaluator feedback systematically
 - Maintain consistency in tone and style
 - Preserve accurate information
 - Enhance clarity and flow
 - Fix technical issues
 - Optimize for target audience
 - Track all changes made
 Output all responses in strict JSON format:
 {
    "revised_content": {
        "main_body": "Complete revised content incorporating all improvements",
        "metadata": {
            "word_count": "Updated word count",
            "changes_made": [
                "Detailed list of specific changes and improvements",
                "Reasoning for each major revision",
                "Feedback points addressed"
            ],
            "improvement_summary": "Overview of main enhancements",
            "preserved_elements": ["Key elements maintained from original"],
            "revision_approach": "Strategy used for revisions"
        }
    }
 }"""
    def _evaluate_content(
        self, content: Union[str, Dict]
    ) -> Dict[str, Any]:
        """
        Coordinate the evaluation of content across multiple evaluators.
        Args:
            content: Content to evaluate (string or dict)
        Returns:
            Combined evaluation results
        """
        try:
            # Ensure content is in correct format
            content_dict = (
                self._safely_parse_json(content)
                if isinstance(content, str)
                else content
            )
            # Collect evaluations
            evaluations = []
            for evaluator in self.evaluators:
                try:
                    eval_response = evaluator.run(
                        json.dumps(content_dict)
                    )
                    self.conversation.add(
                        role=evaluator.agent_name,
                        content=eval_response,
                    )
                    eval_data = self._safely_parse_json(eval_response)
                    evaluations.append(eval_data)
                except Exception as e:
                    logger.warning(f"Evaluator error: {str(e)}")
                    evaluations.append(
                        self._get_fallback_evaluation()
                    )
            # Aggregate results
            return self._aggregate_evaluations(evaluations)
        except Exception as e:
            logger.error(f"Evaluation error: {str(e)}")
            return self._get_fallback_evaluation()
    def _get_fallback_evaluation(self) -> Dict[str, Any]:
        """Get a safe fallback evaluation result."""
        return {
            "scores": {
                "overall": 0.5,
                "categories": {
                    "accuracy": 0.5,
                    "clarity": 0.5,
                    "coherence": 0.5,
                },
            },
            "feedback": ["Evaluation failed"],
            "metadata": {
                "timestamp": str(datetime.now()),
                "is_fallback": True,
            },
        }
    def _aggregate_evaluations(
        self, evaluations: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """
        Aggregate multiple evaluation results into a single evaluation.
        Args:
            evaluations: List of evaluation results
        Returns:
            Combined evaluation
        """
        # Calculate average scores
        overall_scores = []
        accuracy_scores = []
        clarity_scores = []
        coherence_scores = []
        all_feedback = []
        for eval_data in evaluations:
            try:
                scores = eval_data.get("scores", {})
                overall_scores.append(scores.get("overall", 0.5))
                categories = scores.get("categories", {})
                accuracy_scores.append(
                    categories.get("accuracy", 0.5)
                )
                clarity_scores.append(categories.get("clarity", 0.5))
                coherence_scores.append(
                    categories.get("coherence", 0.5)
                )
                all_feedback.extend(eval_data.get("feedback", []))
            except Exception as e:
                logger.warning(
                    f"Error aggregating evaluation: {str(e)}"
                )
        def safe_mean(scores: List[float]) -> float:
            return sum(scores) / len(scores) if scores else 0.5
        return {
            "scores": {
                "overall": safe_mean(overall_scores),
                "categories": {
                    "accuracy": safe_mean(accuracy_scores),
                    "clarity": safe_mean(clarity_scores),
                    "coherence": safe_mean(coherence_scores),
                },
            },
            "feedback": list(set(all_feedback)),  # Remove duplicates
            "metadata": {
                "evaluator_count": len(evaluations),
                "timestamp": str(datetime.now()),
            },
        }
    def run(self, task: str) -> Dict[str, Any]:
        """
        Generate and iteratively refine content based on the given task.
        Args:
            task: Content generation task description
        Returns:
            Dictionary containing final content and metadata
        """
        logger.info(f"Starting content generation for task: {task}")
        try:
            # Get initial direction from supervisor
            supervisor_response = self.main_supervisor.run(task)
            self.conversation.add(
                role=self.main_supervisor.agent_name,
                content=supervisor_response,
            )
            supervisor_data = self._safely_parse_json(
                supervisor_response
            )
            # Generate initial content
            generator_response = self.generator.run(
                json.dumps(supervisor_data.get("next_action", {}))
            )
            self.conversation.add(
                role=self.generator.agent_name,
                content=generator_response,
            )
            current_content = self._safely_parse_json(
                generator_response
            )
            for iteration in range(self.max_iterations):
                logger.info(f"Starting iteration {iteration + 1}")
                # Evaluate current content
                evaluation = self._evaluate_content(current_content)
                # Check if quality threshold is met
                if (
                    evaluation["scores"]["overall"]
                    >= self.quality_threshold
                ):
                    logger.info(
                        "Quality threshold met, returning content"
                    )
                    return {
                        "content": current_content.get(
                            "content", {}
                        ).get("main_body", ""),
                        "final_score": evaluation["scores"][
                            "overall"
                        ],
                        "iterations": iteration + 1,
                        "metadata": {
                            "content_metadata": current_content.get(
                                "content", {}
                            ).get("metadata", {}),
                            "evaluation": evaluation,
                        },
                    }
                # Revise content if needed
                revision_input = {
                    "content": current_content,
                    "evaluation": evaluation,
                }
                revision_response = self.revisor.run(
                    json.dumps(revision_input)
                )
                current_content = self._safely_parse_json(
                    revision_response
                )
                self.conversation.add(
                    role=self.revisor.agent_name,
                    content=revision_response,
                )
            logger.warning(
                "Max iterations reached without meeting quality threshold"
            )
        except Exception as e:
            logger.error(f"Error in generate_and_refine: {str(e)}")
            current_content = {
                "content": {"main_body": f"Error: {str(e)}"}
            }
            evaluation = self._get_fallback_evaluation()
        if self.return_string:
            return self.conversation.return_history_as_string()
        else:
            return {
                "content": current_content.get("content", {}).get(
                    "main_body", ""
                ),
                "final_score": evaluation["scores"]["overall"],
                "iterations": self.max_iterations,
                "metadata": {
                    "content_metadata": current_content.get(
                        "content", {}
                    ).get("metadata", {}),
                    "evaluation": evaluation,
                    "error": "Max iterations reached",
                },
            }
    def save_state(self) -> None:
        """Save the current state of all agents."""
        for agent in [
            self.main_supervisor,
            self.generator,
            *self.evaluators,
            self.revisor,
        ]:
            try:
                agent.save_state()
            except Exception as e:
                logger.error(
                    f"Error saving state for {agent.agent_name}: {str(e)}"
                )
    def load_state(self) -> None:
        """Load the saved state of all agents."""
        for agent in [
            self.main_supervisor,
            self.generator,
            *self.evaluators,
            self.revisor,
        ]:
            try:
                agent.load_state()
            except Exception as e:
                logger.error(
                    f"Error loading state for {agent.agent_name}: {str(e)}"
                )
 if __name__ == "__main__":
    # Example usage
    try:
        talkhier = TalkHier(
            max_iterations=1,
            quality_threshold=0.8,
            model_name="gpt-4o",
            return_string=True,
        )
        task = "Write a comprehensive explanation of quantum computing for beginners"
        result = talkhier.run(task)
        print(result)
        # print(f"Final content: {result['content']}")
        # print(f"Quality score: {result['final_score']}")
        # print(f"Iterations: {result['iterations']}")
    except Exception as e:
        logger.error(f"Error in main execution: {str(e)}")