diff --git a/examples/single_agent/reasoning_agent_examples/agent_judge_evaluation_criteria_example.py b/examples/single_agent/reasoning_agent_examples/agent_judge_evaluation_criteria_example.py
deleted file mode 100644
index f8a1b044..00000000
--- a/examples/single_agent/reasoning_agent_examples/agent_judge_evaluation_criteria_example.py
+++ /dev/null
@@ -1,100 +0,0 @@
-"""
-Agent Judge with Evaluation Criteria Example
-
-This example demonstrates how to use the AgentJudge with custom evaluation criteria.
-The evaluation_criteria parameter allows specifying different criteria with weights 
-for more targeted and customizable evaluation of agent outputs.
-"""
-
-from swarms.agents.agent_judge import AgentJudge
-import os
-from dotenv import load_dotenv
-
-load_dotenv()
-
-# Example 1: Basic usage with evaluation criteria
-print("\n=== Example 1: Using Custom Evaluation Criteria ===\n")
-
-# Create an AgentJudge with custom evaluation criteria
-judge = AgentJudge(
-    model_name="claude-3-7-sonnet-20250219",  # Use any available model
-    evaluation_criteria={
-        "correctness": 0.5,
-        "problem_solving_approach": 0.3, 
-        "explanation_clarity": 0.2
-    }
-)
-
-# Sample output to evaluate
-task_response = [
-    "Task: Determine the time complexity of a binary search algorithm and explain your reasoning.\n\n"
-    "Agent response: The time complexity of binary search is O(log n). In each step, "
-    "we divide the search space in half, resulting in a logarithmic relationship between "
-    "the input size and the number of operations. This can be proven by solving the "
-    "recurrence relation T(n) = T(n/2) + O(1), which gives us T(n) = O(log n)."
-]
-
-# Run evaluation
-evaluation = judge.run(task_response)
-print(evaluation[0])
-
-# Example 2: Specialized criteria for code evaluation
-print("\n=== Example 2: Code Evaluation with Specialized Criteria ===\n")
-
-code_judge = AgentJudge(
-    model_name="claude-3-7-sonnet-20250219",
-    agent_name="code_judge",
-    evaluation_criteria={
-        "code_correctness": 0.4,
-        "code_efficiency": 0.3,
-        "code_readability": 0.3
-    }
-)
-
-# Sample code to evaluate
-code_response = [
-    "Task: Write a function to find the maximum subarray sum in an array of integers.\n\n"
-    "Agent response:\n```python\n"
-    "def max_subarray_sum(arr):\n"
-    "    current_sum = max_sum = arr[0]\n"
-    "    for i in range(1, len(arr)):\n"
-    "        current_sum = max(arr[i], current_sum + arr[i])\n"
-    "        max_sum = max(max_sum, current_sum)\n"
-    "    return max_sum\n\n"
-    "# Example usage\n"
-    "print(max_subarray_sum([-2, 1, -3, 4, -1, 2, 1, -5, 4]))  # Output: 6 (subarray [4, -1, 2, 1])\n"
-    "```\n"
-    "This implementation uses Kadane's algorithm which has O(n) time complexity and "
-    "O(1) space complexity, making it optimal for this problem."
-]
-
-code_evaluation = code_judge.run(code_response)
-print(code_evaluation[0])
-
-# Example 3: Comparing multiple responses
-print("\n=== Example 3: Comparing Multiple Agent Responses ===\n")
-
-comparison_judge = AgentJudge(
-    model_name="claude-3-7-sonnet-20250219",
-    evaluation_criteria={
-        "accuracy": 0.6,
-        "completeness": 0.4
-    }
-)
-
-multiple_responses = comparison_judge.run([
-    "Task: Explain the CAP theorem in distributed systems.\n\n"
-    "Agent A response: CAP theorem states that a distributed system cannot simultaneously "
-    "provide Consistency, Availability, and Partition tolerance. In practice, you must "
-    "choose two out of these three properties.",
-    
-    "Task: Explain the CAP theorem in distributed systems.\n\n"
-    "Agent B response: The CAP theorem, formulated by Eric Brewer, states that in a "
-    "distributed data store, you can only guarantee two of the following three properties: "
-    "Consistency (all nodes see the same data at the same time), Availability (every request "
-    "receives a response), and Partition tolerance (the system continues to operate despite "
-    "network failures). Most modern distributed systems choose to sacrifice consistency in "
-    "favor of availability and partition tolerance, implementing eventual consistency models instead."
-])
-
-print(multiple_responses[0])
\ No newline at end of file
diff --git a/swarms/agents/agent_judge.py b/swarms/agents/agent_judge.py
index 9a5c1e3a..cb33bd87 100644
--- a/swarms/agents/agent_judge.py
+++ b/swarms/agents/agent_judge.py
@@ -1,415 +1,119 @@
-import traceback
-from typing import List, Optional, Union, Dict
-import uuid
+from typing import List
 
 from swarms.prompts.agent_judge_prompt import AGENT_JUDGE_PROMPT
 from swarms.structs.agent import Agent
 from swarms.structs.conversation import Conversation
 from swarms.utils.any_to_str import any_to_str
 
-class AgentJudgeInitializationError(Exception):
-    """
-    Exception raised when there is an error initializing the AgentJudge.
-    """
-    pass
-
-class AgentJudgeExecutionError(Exception):
-    """
-    Exception raised when there is an error executing the AgentJudge.
-    """
-    pass
+from loguru import logger
 
-class AgentJudgeFeedbackCycleError(Exception):
-    """
-    Exception raised when there is an error in the feedback cycle.
-    """
-    pass
 
 class AgentJudge:
     """
-    A specialized agent designed to evaluate and judge outputs from other agents or systems.
-    The AgentJudge acts as a quality control mechanism, providing objective assessments
-    and feedback on various types of content, decisions, or outputs. It's based on research
-    in LLM-based evaluation systems and can maintain context across multiple evaluations.
-    This implementation supports both single task evaluation and batch processing with
-    iterative refinement capabilities.
+    A class to represent an agent judge that processes tasks and generates responses.
 
     Attributes:
-        id (str): Unique identifier for the judge agent instance.
         agent_name (str): The name of the agent judge.
-        system_prompt (str): The system prompt for the agent containing evaluation instructions.
-        model_name (str): The model name used for generating evaluations (e.g., "openai/o1", "gpt-4").
+        system_prompt (str): The system prompt for the agent.
+        model_name (str): The model name used for generating responses.
         conversation (Conversation): An instance of the Conversation class to manage conversation history.
-        max_loops (int): The maximum number of evaluation iterations to run.
-        verbose (bool): Whether to enable verbose logging.
-        agent (Agent): An instance of the Agent class that performs the evaluation execution.
-        evaluation_criteria (Dict[str, float]): Dictionary of evaluation criteria and their weights.
-
-    Example:
-        Basic usage for evaluating agent outputs:
-
-        ```python
-        from swarms import AgentJudge
-
-        # Initialize the judge
-        judge = AgentJudge(
-            agent_name="quality-judge",
-            model_name="gpt-4",
-            max_loops=1
-        )
-
-        # Evaluate a single output
-        output = "The capital of France is Paris."
-        evaluation = judge.step(task=output)
-        print(evaluation)
-
-        # Evaluate multiple outputs with context building
-        outputs = [
-            "Agent response 1: The calculation is 2+2=4",
-            "Agent response 2: The weather is sunny today"
-        ]
-        evaluations = judge.run(tasks=outputs)
-        ```
+        max_loops (int): The maximum number of iterations to run the tasks.
+        agent (Agent): An instance of the Agent class that performs the task execution.
 
     Methods:
-        step(task: str = None, tasks: List[str] = None, img: str = None) -> str:
-            Processes a single task or list of tasks and returns the agent's evaluation.
-        run(task: str = None, tasks: List[str] = None, img: str = None) -> List[str]:
-            Executes evaluation in a loop with context building, collecting responses.
-        run_batched(tasks: List[str] = None, imgs: List[str] = None) -> List[str]:
-            Executes batch evaluation of tasks with corresponding images.
+        step(tasks: List[str]) -> str:
+            Processes a list of tasks and returns the agent's response.
+
+        run(tasks: List[str]) -> List[str]:
+            Executes the tasks in a loop, updating context and collecting responses.
     """
 
     def __init__(
         self,
-        id: str = str(uuid.uuid4()),
-        agent_name: str = "Agent Judge",
-        description: str = "You're an expert AI agent judge. Carefully review the following output(s) generated by another agent. Your job is to provide a detailed, constructive, and actionable critique that will help the agent improve its future performance.",
+        agent_name: str = "agent-judge-01",
         system_prompt: str = AGENT_JUDGE_PROMPT,
         model_name: str = "openai/o1",
         max_loops: int = 1,
-        verbose: bool = False,
-        evaluation_criteria: Optional[Dict[str, float]] = None,
-        *args,
-        **kwargs,
-    ):
-        self.id = id
+    ) -> None:
+        """
+        Initializes the AgentJudge with the specified parameters.
+
+        Args:
+            agent_name (str): The name of the agent judge.
+            system_prompt (str): The system prompt for the agent.
+            model_name (str): The model name used for generating responses.
+            max_loops (int): The maximum number of iterations to run the tasks.
+        """
         self.agent_name = agent_name
         self.system_prompt = system_prompt
         self.model_name = model_name
         self.conversation = Conversation(time_enabled=False)
         self.max_loops = max_loops
-        self.verbose = verbose
-        self.evaluation_criteria = evaluation_criteria or {}
-        
-        # Enhance system prompt with evaluation criteria if provided
-        enhanced_prompt = system_prompt
-        if self.evaluation_criteria:
-            criteria_str = "\n\nEvaluation Criteria:\n"
-            for criterion, weight in self.evaluation_criteria.items():
-                criteria_str += f"- {criterion}: weight = {weight}\n"
-            enhanced_prompt += criteria_str
 
         self.agent = Agent(
             agent_name=agent_name,
-            agent_description=description,
-            system_prompt=enhanced_prompt,
+            agent_description="You're the agent judge",
+            system_prompt=AGENT_JUDGE_PROMPT,
             model_name=model_name,
             max_loops=1,
-            *args,
-            **kwargs,
         )
 
-    def feedback_cycle_step(
-        self,
-        agent: Union[Agent, callable],
-        task: str,
-        img: Optional[str] = None,
-    ):
-        try:
-            # First run the main agent
-            agent_output = agent.run(task=task, img=img)
-
-            # Then run the judge agent
-            judge_output = self.run(task=agent_output, img=img)
-
-            # Run the main agent again with the judge's feedback, using a much improved prompt
-            improved_prompt = (
-                f"You have received the following detailed feedback from the expert agent judge ({self.agent_name}):\n\n"
-                f"--- FEEDBACK START ---\n{judge_output}\n--- FEEDBACK END ---\n\n"
-                f"Your task is to thoughtfully revise and enhance your previous output based on this critique. "
-                f"Carefully address all identified weaknesses, incorporate the suggestions, and strive to maximize the strengths noted. "
-                f"Be specific, accurate, and actionable in your improvements. "
-                f"Here is the original task for reference:\n\n"
-                f"--- TASK ---\n{task}\n--- END TASK ---\n\n"
-                f"Please provide your improved and fully revised output below."
-            )
-
-            return agent.run(task=improved_prompt, img=img)
-        except Exception as e:
-            raise AgentJudgeFeedbackCycleError(
-                f"Error In Agent Judge Feedback Cycle: {e} Traceback: {traceback.format_exc()}"
-            )
-
-    def feedback_cycle(
-        self,
-        agent: Union[Agent, callable],
-        task: str,
-        img: Optional[str] = None,
-        loops: int = 1,
-    ):
-        loop = 0
-        original_task = task  # Preserve the original task
-        current_output = None  # Track the current output
-        all_outputs = []  # Collect all outputs from each iteration
-
-        while loop < loops:
-            # First iteration: run the standard feedback cycle step
-            current_output = self.feedback_cycle_step(
-                agent, original_task, img
-            )
-
-            # Add the current output to our collection
-            all_outputs.append(current_output)
-            loop += 1
-
-        return all_outputs
-
-    def step(
-        self,
-        task: str = None,
-        tasks: Optional[List[str]] = None,
-        img: Optional[str] = None,
-    ) -> str:
+    def step(self, tasks: List[str]) -> str:
         """
-        Processes a single task or list of tasks and returns the agent's evaluation.
-        This method performs a one-shot evaluation of the provided content. It takes
-        either a single task string or a list of tasks and generates a comprehensive
-        evaluation with strengths, weaknesses, and improvement suggestions.
+        Processes a list of tasks and returns the agent's response.
 
         Args:
-            task (str, optional): A single task/output to be evaluated.
-            tasks (List[str], optional): A list of tasks/outputs to be evaluated.
-            img (str, optional): Path to an image file for multimodal evaluation.
+            tasks (List[str]): A list of tasks to be processed.
 
         Returns:
-            str: A detailed evaluation response from the agent including:
-                - Strengths: What the agent/output did well
-                - Weaknesses: Areas that need improvement
-                - Suggestions: Specific recommendations for improvement
-                - Factual accuracy assessment
-
-        Raises:
-            ValueError: If neither task nor tasks are provided.
-
-        Example:
-            ```python
-            # Single task evaluation
-            evaluation = judge.step(task="The answer is 42.")
-
-            # Multiple tasks evaluation
-            evaluation = judge.step(tasks=[
-                "Response 1: Paris is the capital of France",
-                "Response 2: 2 + 2 = 5"  # Incorrect
-            ])
-
-            # Multimodal evaluation
-            evaluation = judge.step(
-                task="Describe this image",
-                img="path/to/image.jpg"
-            )
-            ```
+            str: The response generated by the agent.
         """
-        try:
-            prompt = ""
-            if tasks:
-                prompt = any_to_str(tasks)
-            elif task:
-                prompt = task
-            else:
-                raise ValueError("No tasks or task provided")
-                
-            # 添加评估标准到任务描述中
-            task_instruction = "You are an expert AI agent judge. Carefully review the following output(s) generated by another agent. "
-            task_instruction += "Your job is to provide a detailed, constructive, and actionable critique that will help the agent improve its future performance. "
-            task_instruction += "Your feedback should address the following points:\n"
-            task_instruction += "1. Strengths: What did the agent do well? Highlight any correct reasoning, clarity, or effective problem-solving.\n"
-            task_instruction += "2. Weaknesses: Identify any errors, omissions, unclear reasoning, or areas where the output could be improved.\n"
-            task_instruction += "3. Suggestions: Offer specific, practical recommendations for how the agent can improve its next attempt. "
-            task_instruction += "This may include advice on reasoning, structure, completeness, or style.\n"
-            task_instruction += "4. If relevant, point out any factual inaccuracies or logical inconsistencies.\n"
-            
-            # 在任务说明中添加评估标准
-            if self.evaluation_criteria:
-                criteria_names = list(self.evaluation_criteria.keys())
-                task_instruction += "\nPlease use these specific evaluation criteria with their respective weights:\n"
-                for criterion, weight in self.evaluation_criteria.items():
-                    task_instruction += f"- {criterion}: weight = {weight}\n"
-                    
-            task_instruction += "Be thorough, objective, and professional. Your goal is to help the agent learn and produce better results in the future.\n\n"
-            task_instruction += f"Output(s) to evaluate:\n{prompt}\n"
+        prompt = any_to_str(tasks)
+        logger.debug(f"Running step with prompt: {prompt}")
 
-            response = self.agent.run(
-                task=task_instruction,
-                img=img,
-            )
+        print(prompt)
 
-            return response
-        except Exception as e:
-            error_message = (
-                f"AgentJudge encountered an error: {e}\n"
-                f"Traceback:\n{traceback.format_exc()}\n\n"
-                "If this issue persists, please:\n"
-                "- Open a GitHub issue: https://github.com/swarms-ai/swarms/issues\n"
-                "- Join our Discord for real-time support: swarms.ai\n"
-                "- Or book a call: https://cal.com/swarms\n"
-            )
-            raise AgentJudgeExecutionError(error_message)
+        response = self.agent.run(
+            task=f"Evaluate the following output or outputs: {prompt}"
+        )
+        logger.debug(f"Received response: {response}")
 
-    def run(
-        self,
-        task: str = None,
-        tasks: Optional[List[str]] = None,
-        img: Optional[str] = None,
-    ):
+        return response
+
+    def run(self, tasks: List[str]) -> List[str]:
         """
-        Executes evaluation in multiple iterations with context building and refinement.
-        This method runs the evaluation process for the specified number of max_loops,
-        where each iteration builds upon the previous context. This allows for iterative
-        refinement of evaluations and deeper analysis over multiple passes.
+        Executes the tasks in a loop, updating context and collecting responses.
 
         Args:
-            task (str, optional): A single task/output to be evaluated.
-            tasks (List[str], optional): A list of tasks/outputs to be evaluated.
-            img (str, optional): Path to an image file for multimodal evaluation.
+            tasks (List[str]): A list of tasks to be executed.
 
         Returns:
-            List[str]: A list of evaluation responses, one for each iteration.
-                      Each subsequent evaluation includes context from previous iterations.
-
-        Example:
-            ```python
-            # Single task with iterative refinement
-            judge = AgentJudge(max_loops=3)
-            evaluations = judge.run(task="Agent output to evaluate")
-            # Returns 3 evaluations, each building on the previous
-
-            # Multiple tasks with context building
-            evaluations = judge.run(tasks=[
-                "First agent response",
-                "Second agent response"
-            ])
-
-            # With image analysis
-            evaluations = judge.run(
-                task="Analyze this chart",
-                img="chart.png"
-            )
-            ```
-
-        Note:
-            - The first iteration evaluates the original task(s)
-            - Subsequent iterations include context from previous evaluations
-            - This enables deeper analysis and refinement of judgments
-            - Useful for complex evaluations requiring multiple perspectives
+            List[str]: A list of responses generated by the agent for each iteration.
         """
-        try:
-            responses = []
-            context = ""
-
-            # Convert single task to list for consistent processing
-            if task and not tasks:
-                tasks = [task]
-                task = None  # Clear to avoid confusion in step method
-
-            for _ in range(self.max_loops):
-                # Add context to the tasks if available
-                if context and tasks:
-                    contextualized_tasks = [
-                        f"Previous context: {context}\nTask: {t}"
-                        for t in tasks
-                    ]
-                else:
-                    contextualized_tasks = tasks
-
-                # Get response for current iteration
-                current_response = self.step(
-                    task=task,
-                    tasks=contextualized_tasks,
-                    img=img,
-                )
-
-                responses.append(current_response)
-
-                # Update context for next iteration
-                context = current_response
+        responses = []
+        context = ""
+
+        for _ in range(self.max_loops):
+            # Add context to the tasks if available
+            if context:
+                contextualized_tasks = [
+                    f"Previous context: {context}\nTask: {task}"
+                    for task in tasks
+                ]
+            else:
+                contextualized_tasks = tasks
 
-            return responses
-        except Exception as e:
-            error_message = (
-                f"AgentJudge encountered an error: {e}\n"
-                f"Traceback:\n{traceback.format_exc()}\n\n"
-                "If this issue persists, please:\n"
-                "- Open a GitHub issue: https://github.com/swarms-ai/swarms/issues\n"
-                "- Join our Discord for real-time support: swarms.ai\n"
-                "- Or book a call: https://cal.com/swarms\n"
+            # Get response for current iteration
+            current_response = self.step(contextualized_tasks)
+            responses.append(current_response)
+            logger.debug(
+                f"Current response added: {current_response}"
             )
-            raise AgentJudgeExecutionError(error_message)
-
-    def run_batched(
-        self,
-        tasks: Optional[List[str]] = None,
-        imgs: Optional[List[str]] = None,
-    ):
-        """
-        Executes batch evaluation of multiple tasks with corresponding images.
-        This method processes multiple task-image pairs independently, where each
-        task can be evaluated with its corresponding image. Unlike the run() method,
-        this doesn't build context between different tasks - each is evaluated
-        independently.
-
-        Args:
-            tasks (List[str], optional): A list of tasks/outputs to be evaluated.
-            imgs (List[str], optional): A list of image paths corresponding to each task.
-                                       Must be the same length as tasks if provided.
 
-        Returns:
-            List[List[str]]: A list of evaluation responses for each task. Each inner
-                           list contains the responses from all iterations (max_loops)
-                           for that particular task.
-
-        Example:
-            ```python
-            # Batch evaluation with images
-            tasks = [
-                "Describe what you see in this image",
-                "What's wrong with this chart?",
-                "Analyze the trends shown"
-            ]
-            images = [
-                "photo1.jpg",
-                "chart1.png",
-                "graph1.png"
-            ]
-            evaluations = judge.run_batched(tasks=tasks, imgs=images)
-            # Returns evaluations for each task-image pair
+            # Update context for next iteration
+            context = current_response
 
-            # Batch evaluation without images
-            evaluations = judge.run_batched(tasks=[
-                "Agent response 1",
-                "Agent response 2",
-                "Agent response 3"
-            ])
-            ```
+            # Add to conversation history
+            logger.debug("Added message to conversation history.")
 
-        Note:
-            - Each task is processed independently
-            - If imgs is provided, it must have the same length as tasks
-            - Each task goes through max_loops iterations independently
-            - No context is shared between different tasks in the batch
-        """
-        responses = []
-        for task, img in zip(tasks, imgs):
-            response = self.run(task=task, img=img)
-            responses.append(response)
-        return responses
\ No newline at end of file
+        return responses