Merge pull request #783 from harshalmore31/talk_hier

feat: Implement TalkHier framework for hierarchical multi-agent content generation
5 months ago · 49b65e4d07
parent 18c0aa4425 8b4d593de5
commit 49b65e4d07
1 changed files with 682 additions and 0 deletions
--- a/swarms/structs/Talk_Hier.py
+++ b/swarms/structs/Talk_Hier.py
@ -0,0 +1,682 @@
+"""
+TalkHier: A hierarchical multi-agent framework for content generation and refinement.
+Implements structured communication and evaluation protocols.
+"""
+
+import json
+import logging
+from dataclasses import dataclass, asdict
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+from concurrent.futures import ThreadPoolExecutor
+
+from swarms import Agent
+from swarms.structs.conversation import Conversation
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class AgentRole(Enum):
+    """Defines the possible roles for agents in the system."""
+
+    SUPERVISOR = "supervisor"
+    GENERATOR = "generator"
+    EVALUATOR = "evaluator"
+    REVISOR = "revisor"
+
+
+@dataclass
+class CommunicationEvent:
+    """Represents a structured communication event between agents."""
+    message: str 
+    background: Optional[str] = None
+    intermediate_output: Optional[Dict[str, Any]] = None
+    sender: str = ""
+    receiver: str = ""
+    timestamp: str = str(datetime.now())
+
+
+class TalkHier:
+    """
+    A hierarchical multi-agent system for content generation and refinement.
+
+    Implements the TalkHier framework with structured communication protocols
+    and hierarchical refinement processes.
+
+    Attributes:
+        max_iterations: Maximum number of refinement iterations
+        quality_threshold: Minimum score required for content acceptance
+        model_name: Name of the LLM model to use
+        base_path: Path for saving agent states
+    """
+
+    def __init__(
+        self,
+        max_iterations: int = 3,
+        quality_threshold: float = 0.8,
+        model_name: str = "gpt-4",
+        base_path: Optional[str] = None,
+        return_string: bool = False,
+    ):
+        """Initialize the TalkHier system."""
+        self.max_iterations = max_iterations
+        self.quality_threshold = quality_threshold
+        self.model_name = model_name
+        self.return_string = return_string
+        self.base_path = (
+            Path(base_path) if base_path else Path("./agent_states")
+        )
+        self.base_path.mkdir(exist_ok=True)
+
+        # Initialize agents
+        self._init_agents()
+
+        # Create conversation
+        self.conversation = Conversation()
+
+    def _safely_parse_json(self, json_str: str) -> Dict[str, Any]:
+        """
+        Safely parse JSON string, handling various formats and potential errors.
+
+        Args:
+            json_str: String containing JSON data
+
+        Returns:
+            Parsed dictionary
+        """
+        try:
+            # Try direct JSON parsing
+            return json.loads(json_str)
+        except json.JSONDecodeError:
+            try:
+                # Try extracting JSON from potential text wrapper
+                import re
+
+                json_match = re.search(r"\{.*\}", json_str, re.DOTALL)
+                if (json_match):
+                    return json.loads(json_match.group())
+                # Try extracting from markdown code blocks
+                code_block_match = re.search(
+                    r"```(?:json)?\s*(\{.*?\})\s*```",
+                    json_str,
+                    re.DOTALL,
+                )
+                if (code_block_match):
+                    return json.loads(code_block_match.group(1))
+            except Exception as e:
+                logger.warning(f"Failed to extract JSON: {str(e)}")
+
+            # Fallback: create structured dict from text
+            return {
+                "content": json_str,
+                "metadata": {
+                    "parsed": False,
+                    "timestamp": str(datetime.now()),
+                },
+            }
+
+    def _get_criteria_generator_prompt(self) -> str:
+        """Get the prompt for the criteria generator agent."""
+        return """You are a Criteria Generator agent responsible for creating task-specific evaluation criteria.
+Analyze the task and generate appropriate evaluation criteria based on:
+- Task type and complexity
+- Required domain knowledge
+- Target audience expectations
+- Quality requirements
+
+Output all responses in strict JSON format:
+{
+    "criteria": {
+        "criterion_name": {
+            "description": "Detailed description of what this criterion measures",
+            "importance": "Weight from 0.0-1.0 indicating importance",
+            "evaluation_guide": "Guidelines for how to evaluate this criterion"
+        }
+    },
+    "metadata": {
+        "task_type": "Classification of the task type",
+        "complexity_level": "Assessment of task complexity",
+        "domain_focus": "Primary domain or field of the task"
+    }
+}"""
+
+    def _init_agents(self) -> None:
+        """Initialize all agents with their specific roles and prompts."""
+        # Main supervisor agent
+        self.main_supervisor = Agent(
+            agent_name="Main-Supervisor",
+            system_prompt=self._get_supervisor_prompt(),
+            model_name=self.model_name,
+            max_loops=1,
+            saved_state_path=str(
+                self.base_path / "main_supervisor.json"
+            ),
+            verbose=True,
+        )
+
+        # Generator agent
+        self.generator = Agent(
+            agent_name="Content-Generator",
+            system_prompt=self._get_generator_prompt(),
+            model_name=self.model_name,
+            max_loops=1,
+            saved_state_path=str(self.base_path / "generator.json"),
+            verbose=True,
+        )
+
+        # Criteria Generator agent
+        self.criteria_generator = Agent(
+            agent_name="Criteria-Generator",
+            system_prompt=self._get_criteria_generator_prompt(),
+            model_name=self.model_name,
+            max_loops=1,
+            saved_state_path=str(self.base_path / "criteria_generator.json"),
+            verbose=True,
+        )
+
+        # Evaluators without criteria (will be set during run)
+        self.evaluators = []
+        for i in range(3):
+            self.evaluators.append(
+                Agent(
+                    agent_name=f"Evaluator-{i}",
+                    system_prompt=self._get_evaluator_prompt(i),
+                    model_name=self.model_name,
+                    max_loops=1,
+                    saved_state_path=str(
+                        self.base_path / f"evaluator_{i}.json"
+                    ),
+                    verbose=True,
+                )
+            )
+
+        # Revisor agent
+        self.revisor = Agent(
+            agent_name="Content-Revisor",
+            system_prompt=self._get_revisor_prompt(),
+            model_name=self.model_name,
+            max_loops=1,
+            saved_state_path=str(self.base_path / "revisor.json"),
+            verbose=True,
+        )
+
+    def _generate_dynamic_criteria(self, task: str) -> Dict[str, str]:
+        """
+        Generate dynamic evaluation criteria based on the task.
+
+        Args:
+            task: Content generation task description
+
+        Returns:
+            Dictionary containing dynamic evaluation criteria
+        """
+        # Example dynamic criteria generation logic
+        if "technical" in task.lower():
+            return {
+                "accuracy": "Technical correctness and source reliability",
+                "clarity": "Readability and logical structure",
+                "depth": "Comprehensive coverage of technical details",
+                "engagement": "Interest level and relevance to the audience",
+                "technical_quality": "Grammar, spelling, and formatting",
+            }
+        else:
+            return {
+                "accuracy": "Factual correctness and source reliability",
+                "clarity": "Readability and logical structure",
+                "coherence": "Logical consistency and argument structure",
+                "engagement": "Interest level and relevance to the audience",
+                "completeness": "Coverage of the topic and depth",
+                "technical_quality": "Grammar, spelling, and formatting",
+            }
+
+    def _get_supervisor_prompt(self) -> str:
+        """Get the prompt for the supervisor agent."""
+        return """You are a Supervisor agent responsible for orchestrating the content generation process and selecting the best evaluation criteria.
+
+You must:
+1. Analyze tasks and develop strategies
+2. Review multiple evaluator feedback
+3. Select the most appropriate evaluation based on:
+   - Completeness of criteria
+   - Relevance to task
+   - Quality of feedback
+4. Provide clear instructions for content revision
+
+Output all responses in strict JSON format:
+{
+    "thoughts": {
+        "task_analysis": "Analysis of requirements, audience, scope",
+        "strategy": "Step-by-step plan and success metrics",
+        "evaluation_selection": {
+            "chosen_evaluator": "ID of selected evaluator",
+            "reasoning": "Why this evaluation was chosen",
+            "key_criteria": ["List of most important criteria"]
+        }
+    },
+    "next_action": {
+        "agent": "Next agent to engage",
+        "instruction": "Detailed instructions with context"
+    }
+}"""
+
+    def _get_generator_prompt(self) -> str:
+        """Get the prompt for the generator agent."""
+        return """You are a Generator agent responsible for creating high-quality, original content. Your role is to produce content that is engaging, informative, and tailored to the target audience.
+
+When generating content:
+- Thoroughly research and fact-check all information
+- Structure content logically with clear flow
+- Use appropriate tone and language for the target audience
+- Include relevant examples and explanations
+- Ensure content is original and plagiarism-free
+- Consider SEO best practices where applicable
+
+Output all responses in strict JSON format:
+{
+    "content": {
+        "main_body": "The complete generated content with proper formatting and structure",
+        "metadata": {
+            "word_count": "Accurate word count of main body",
+            "target_audience": "Detailed audience description",
+            "key_points": ["List of main points covered"],
+            "sources": ["List of reference sources if applicable"],
+            "readability_level": "Estimated reading level",
+            "tone": "Description of content tone"
+        }
+    }
+}"""
+
+    def _get_evaluator_prompt(self, evaluator_id: int) -> str:
+        """Get the base prompt for an evaluator agent."""
+        return f"""You are Evaluator {evaluator_id}, responsible for critically assessing content quality. Your evaluation must be thorough, objective, and constructive.
+
+When receiving content to evaluate:
+1. First analyze the task description to determine appropriate evaluation criteria
+2. Generate specific criteria based on task requirements
+3. Evaluate content against these criteria
+4. Provide detailed feedback for each criterion
+
+Output all responses in strict JSON format:
+{{
+    "generated_criteria": {{
+        "criteria_name": "description of what this criterion measures",
+        // Add more criteria based on task analysis
+    }},
+    "scores": {{
+        "overall": "0.0-1.0 composite score",
+        "categories": {{
+            // Scores for each generated criterion
+            "criterion_name": "0.0-1.0 score with evidence"
+        }}
+    }},
+    "feedback": [
+        "Specific, actionable improvement suggestions per criterion"
+    ],
+    "strengths": ["Notable positive aspects"],
+    "weaknesses": ["Areas needing improvement"]
+}}"""
+
+    def _get_revisor_prompt(self) -> str:
+        """Get the prompt for the revisor agent."""
+        return """You are a Revisor agent responsible for improving content based on evaluator feedback. Your role is to enhance content while maintaining its core message and purpose.
+
+When revising content:
+- Address all evaluator feedback systematically
+- Maintain consistency in tone and style
+- Preserve accurate information
+- Enhance clarity and flow
+- Fix technical issues
+- Optimize for target audience
+- Track all changes made
+
+Output all responses in strict JSON format:
+{
+    "revised_content": {
+        "main_body": "Complete revised content incorporating all improvements",
+        "metadata": {
+            "word_count": "Updated word count",
+            "changes_made": [
+                "Detailed list of specific changes and improvements",
+                "Reasoning for each major revision",
+                "Feedback points addressed"
+            ],
+            "improvement_summary": "Overview of main enhancements",
+            "preserved_elements": ["Key elements maintained from original"],
+            "revision_approach": "Strategy used for revisions"
+        }
+    }
+}"""
+
+    def _generate_criteria_for_task(self, task: str) -> Dict[str, Any]:
+        """Generate evaluation criteria for the given task."""
+        try:
+            criteria_input = {
+                "task": task,
+                "instruction": "Generate specific evaluation criteria for this task."
+            }
+            
+            criteria_response = self.criteria_generator.run(json.dumps(criteria_input))
+            self.conversation.add(
+                role="Criteria-Generator",
+                content=criteria_response
+            )
+            
+            return self._safely_parse_json(criteria_response)
+        except Exception as e:
+            logger.error(f"Error generating criteria: {str(e)}")
+            return {"criteria": {}}
+
+    def _create_comm_event(self, sender: Agent, receiver: Agent, response: Dict) -> CommunicationEvent:
+        """Create a structured communication event between agents."""
+        return CommunicationEvent(
+            message=response.get("message", ""),
+            background=response.get("background", ""),
+            intermediate_output=response.get("intermediate_output", {}),
+            sender=sender.agent_name,
+            receiver=receiver.agent_name,
+        )
+
+    def _evaluate_content(self, content: Union[str, Dict], task: str) -> Dict[str, Any]:
+        """Coordinate evaluation process with parallel evaluator execution."""
+        try:
+            content_dict = self._safely_parse_json(content) if isinstance(content, str) else content
+            criteria_data = self._generate_criteria_for_task(task)
+
+            def run_evaluator(evaluator, eval_input):
+                response = evaluator.run(json.dumps(eval_input))
+                return {
+                    "evaluator_id": evaluator.agent_name, 
+                    "evaluation": self._safely_parse_json(response)
+                }
+
+            eval_inputs = [{
+                "task": task,
+                "content": content_dict,
+                "criteria": criteria_data.get("criteria", {})
+            } for _ in self.evaluators]
+
+            with ThreadPoolExecutor() as executor:
+                evaluations = list(executor.map(
+                    lambda x: run_evaluator(*x), 
+                    zip(self.evaluators, eval_inputs)
+                ))
+
+            supervisor_input = {
+                "evaluations": evaluations,
+                "task": task,
+                "instruction": "Synthesize feedback"
+            }
+            supervisor_response = self.main_supervisor.run(json.dumps(supervisor_input))
+            aggregated_eval = self._safely_parse_json(supervisor_response)
+
+            # Track communication
+            comm_event = self._create_comm_event(
+                self.main_supervisor, 
+                self.revisor, 
+                aggregated_eval
+            )
+            self.conversation.add(
+                role="Communication",
+                content=json.dumps(asdict(comm_event))
+            )
+
+            return aggregated_eval
+
+        except Exception as e:
+            logger.error(f"Evaluation error: {str(e)}")
+            return self._get_fallback_evaluation()
+
+    def _get_fallback_evaluation(self) -> Dict[str, Any]:
+        """Get a safe fallback evaluation result."""
+        return {
+            "scores": {
+                "overall": 0.5,
+                "categories": {
+                    "accuracy": 0.5,
+                    "clarity": 0.5,
+                    "coherence": 0.5,
+                },
+            },
+            "feedback": ["Evaluation failed"],
+            "metadata": {
+                "timestamp": str(datetime.now()),
+                "is_fallback": True,
+            },
+        }
+
+    def _aggregate_evaluations(
+        self, evaluations: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Aggregate multiple evaluation results into a single evaluation."""
+        try:
+            # Collect all unique criteria from evaluators
+            all_criteria = set()
+            for eval_data in evaluations:
+                categories = eval_data.get("scores", {}).get("categories", {})
+                all_criteria.update(categories.keys())
+
+            # Initialize score aggregation
+            aggregated_scores = {criterion: [] for criterion in all_criteria}
+            overall_scores = []
+            all_feedback = []
+
+            # Collect scores and feedback
+            for eval_data in evaluations:
+                scores = eval_data.get("scores", {})
+                overall_scores.append(scores.get("overall", 0.5))
+                
+                categories = scores.get("categories", {})
+                for criterion in all_criteria:
+                    if criterion in categories:
+                        aggregated_scores[criterion].append(
+                            categories.get(criterion, 0.5)
+                        )
+
+                all_feedback.extend(eval_data.get("feedback", []))
+
+            # Calculate means
+            def safe_mean(scores: List[float]) -> float:
+                return sum(scores) / len(scores) if scores else 0.5
+
+            return {
+                "scores": {
+                    "overall": safe_mean(overall_scores),
+                    "categories": {
+                        criterion: safe_mean(scores)
+                        for criterion, scores in aggregated_scores.items()
+                    },
+                },
+                "feedback": list(set(all_feedback)),
+                "metadata": {
+                    "evaluator_count": len(evaluations),
+                    "criteria_used": list(all_criteria),
+                    "timestamp": str(datetime.now()),
+                },
+            }
+
+        except Exception as e:
+            logger.error(f"Error in evaluation aggregation: {str(e)}")
+            return self._get_fallback_evaluation()
+
+    def _evaluate_and_revise(
+        self, content: Union[str, Dict], task: str
+    ) -> Dict[str, Any]:
+        """Coordinate evaluation and revision process."""
+        try:
+            # Get evaluations and supervisor selection
+            evaluation_result = self._evaluate_content(content, task)
+            
+            # Extract selected evaluation and supervisor reasoning
+            selected_evaluation = evaluation_result.get("selected_evaluation", {})
+            supervisor_reasoning = evaluation_result.get("supervisor_reasoning", {})
+            
+            # Prepare revision input with selected evaluation
+            revision_input = {
+                "content": content,
+                "evaluation": selected_evaluation,
+                "supervisor_feedback": supervisor_reasoning,
+                "instruction": "Revise the content based on the selected evaluation feedback"
+            }
+
+            # Get revision from content generator
+            revision_response = self.generator.run(json.dumps(revision_input))
+            revised_content = self._safely_parse_json(revision_response)
+
+            return {
+                "content": revised_content,
+                "evaluation": evaluation_result
+            }
+        except Exception as e:
+            logger.error(f"Evaluation and revision error: {str(e)}")
+            return {
+                "content": content,
+                "evaluation": self._get_fallback_evaluation()
+            }
+
+    def run(self, task: str) -> Dict[str, Any]:
+        """
+        Generate and iteratively refine content based on the given task.
+
+        Args:
+            task: Content generation task description
+
+        Returns:
+            Dictionary containing final content and metadata
+        """
+        logger.info(f"Starting content generation for task: {task}")
+
+        try:
+            # Get initial direction from supervisor
+            supervisor_response = self.main_supervisor.run(task)
+
+            self.conversation.add(
+                role=self.main_supervisor.agent_name,
+                content=supervisor_response,
+            )
+
+            supervisor_data = self._safely_parse_json(
+                supervisor_response
+            )
+
+            # Generate initial content
+            generator_response = self.generator.run(
+                json.dumps(supervisor_data.get("next_action", {}))
+            )
+
+            self.conversation.add(
+                role=self.generator.agent_name,
+                content=generator_response,
+            )
+
+            current_content = self._safely_parse_json(
+                generator_response
+            )
+
+            for iteration in range(self.max_iterations):
+                logger.info(f"Starting iteration {iteration + 1}")
+
+                # Evaluate and revise content
+                result = self._evaluate_and_revise(current_content, task)
+                evaluation = result["evaluation"]
+                current_content = result["content"]
+
+                # Check if quality threshold is met
+                selected_eval = evaluation.get("selected_evaluation", {})
+                overall_score = selected_eval.get("scores", {}).get("overall", 0.0)
+                
+                if overall_score >= self.quality_threshold:
+                    logger.info("Quality threshold met, returning content")
+                    return {
+                        "content": current_content.get("content", {}).get("main_body", ""),
+                        "final_score": overall_score,
+                        "iterations": iteration + 1,
+                        "metadata": {
+                            "content_metadata": current_content.get("content", {}).get("metadata", {}),
+                            "evaluation": evaluation,
+                        },
+                    }
+
+                # Add to conversation history
+                self.conversation.add(
+                    role=self.generator.agent_name,
+                    content=json.dumps(current_content),
+                )
+
+            logger.warning(
+                "Max iterations reached without meeting quality threshold"
+            )
+
+        except Exception as e:
+            logger.error(f"Error in generate_and_refine: {str(e)}")
+            current_content = {
+                "content": {"main_body": f"Error: {str(e)}"}
+            }
+            evaluation = self._get_fallback_evaluation()
+
+        if self.return_string:
+            return self.conversation.return_history_as_string()
+        else:
+            return {
+                "content": current_content.get("content", {}).get(
+                    "main_body", ""
+                ),
+                "final_score": evaluation["scores"]["overall"],
+                "iterations": self.max_iterations,
+                "metadata": {
+                    "content_metadata": current_content.get(
+                        "content", {}
+                    ).get("metadata", {}),
+                    "evaluation": evaluation,
+                    "error": "Max iterations reached",
+                },
+            }
+
+    def save_state(self) -> None:
+        """Save the current state of all agents."""
+        for agent in [
+            self.main_supervisor,
+            self.generator,
+            *self.evaluators,
+            self.revisor,
+        ]:
+            try:
+                agent.save_state()
+            except Exception as e:
+                logger.error(
+                    f"Error saving state for {agent.agent_name}: {str(e)}"
+                )
+
+    def load_state(self) -> None:
+        """Load the saved state of all agents."""
+        for agent in [
+            self.main_supervisor,
+            self.generator,
+            *self.evaluators,
+            self.revisor,
+        ]:
+            try:
+                agent.load_state()
+            except Exception as e:
+                logger.error(
+                    f"Error loading state for {agent.agent_name}: {str(e)}"
+                )
+
+
+if __name__ == "__main__":
+    try:
+        talkhier = TalkHier(
+            max_iterations=1,
+            quality_threshold=0.8,
+            model_name="gpt-4o",
+            return_string=False,
+        )
+
+        # Ask for user input
+        task = input("Enter the content generation task description: ")
+        result = talkhier.run(task)
+
+    except Exception as e:
+        logger.error(f"Error in main execution: {str(e)}")