diff --git a/examples/single_agent/reasoning_agent_examples/agent_judge_evaluation_criteria_example.py b/examples/single_agent/reasoning_agent_examples/agent_judge_evaluation_criteria_example.py
new file mode 100644
index 00000000..f8a1b044
--- /dev/null
+++ b/examples/single_agent/reasoning_agent_examples/agent_judge_evaluation_criteria_example.py
@@ -0,0 +1,100 @@
+"""
+Agent Judge with Evaluation Criteria Example
+
+This example demonstrates how to use the AgentJudge with custom evaluation criteria.
+The evaluation_criteria parameter allows specifying different criteria with weights 
+for more targeted and customizable evaluation of agent outputs.
+"""
+
+from swarms.agents.agent_judge import AgentJudge
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Example 1: Basic usage with evaluation criteria
+print("\n=== Example 1: Using Custom Evaluation Criteria ===\n")
+
+# Create an AgentJudge with custom evaluation criteria
+judge = AgentJudge(
+    model_name="claude-3-7-sonnet-20250219",  # Use any available model
+    evaluation_criteria={
+        "correctness": 0.5,
+        "problem_solving_approach": 0.3, 
+        "explanation_clarity": 0.2
+    }
+)
+
+# Sample output to evaluate
+task_response = [
+    "Task: Determine the time complexity of a binary search algorithm and explain your reasoning.\n\n"
+    "Agent response: The time complexity of binary search is O(log n). In each step, "
+    "we divide the search space in half, resulting in a logarithmic relationship between "
+    "the input size and the number of operations. This can be proven by solving the "
+    "recurrence relation T(n) = T(n/2) + O(1), which gives us T(n) = O(log n)."
+]
+
+# Run evaluation
+evaluation = judge.run(task_response)
+print(evaluation[0])
+
+# Example 2: Specialized criteria for code evaluation
+print("\n=== Example 2: Code Evaluation with Specialized Criteria ===\n")
+
+code_judge = AgentJudge(
+    model_name="claude-3-7-sonnet-20250219",
+    agent_name="code_judge",
+    evaluation_criteria={
+        "code_correctness": 0.4,
+        "code_efficiency": 0.3,
+        "code_readability": 0.3
+    }
+)
+
+# Sample code to evaluate
+code_response = [
+    "Task: Write a function to find the maximum subarray sum in an array of integers.\n\n"
+    "Agent response:\n```python\n"
+    "def max_subarray_sum(arr):\n"
+    "    current_sum = max_sum = arr[0]\n"
+    "    for i in range(1, len(arr)):\n"
+    "        current_sum = max(arr[i], current_sum + arr[i])\n"
+    "        max_sum = max(max_sum, current_sum)\n"
+    "    return max_sum\n\n"
+    "# Example usage\n"
+    "print(max_subarray_sum([-2, 1, -3, 4, -1, 2, 1, -5, 4]))  # Output: 6 (subarray [4, -1, 2, 1])\n"
+    "```\n"
+    "This implementation uses Kadane's algorithm which has O(n) time complexity and "
+    "O(1) space complexity, making it optimal for this problem."
+]
+
+code_evaluation = code_judge.run(code_response)
+print(code_evaluation[0])
+
+# Example 3: Comparing multiple responses
+print("\n=== Example 3: Comparing Multiple Agent Responses ===\n")
+
+comparison_judge = AgentJudge(
+    model_name="claude-3-7-sonnet-20250219",
+    evaluation_criteria={
+        "accuracy": 0.6,
+        "completeness": 0.4
+    }
+)
+
+multiple_responses = comparison_judge.run([
+    "Task: Explain the CAP theorem in distributed systems.\n\n"
+    "Agent A response: CAP theorem states that a distributed system cannot simultaneously "
+    "provide Consistency, Availability, and Partition tolerance. In practice, you must "
+    "choose two out of these three properties.",
+    
+    "Task: Explain the CAP theorem in distributed systems.\n\n"
+    "Agent B response: The CAP theorem, formulated by Eric Brewer, states that in a "
+    "distributed data store, you can only guarantee two of the following three properties: "
+    "Consistency (all nodes see the same data at the same time), Availability (every request "
+    "receives a response), and Partition tolerance (the system continues to operate despite "
+    "network failures). Most modern distributed systems choose to sacrifice consistency in "
+    "favor of availability and partition tolerance, implementing eventual consistency models instead."
+])
+
+print(multiple_responses[0])
\ No newline at end of file
diff --git a/swarms/agents/agent_judge.py b/swarms/agents/agent_judge.py
index 566125d1..5a8742e8 100644
--- a/swarms/agents/agent_judge.py
+++ b/swarms/agents/agent_judge.py
@@ -1,5 +1,7 @@
 import traceback
-from typing import List, Optional, Union
+
+from typing import List, Optional, Union, Dict
+
 import uuid
 
 from swarms.prompts.agent_judge_prompt import AGENT_JUDGE_PROMPT
@@ -15,23 +17,20 @@ class AgentJudgeInitializationError(Exception):
 
     pass
 
-
 class AgentJudgeExecutionError(Exception):
     """
     Exception raised when there is an error executing the AgentJudge.
     """
 
-    pass
 
+    pass
 
 class AgentJudgeFeedbackCycleError(Exception):
     """
     Exception raised when there is an error in the feedback cycle.
     """
-
     pass
 
-
 class AgentJudge:
     """
     A specialized agent designed to evaluate and judge outputs from other agents or systems.
@@ -53,6 +52,8 @@ class AgentJudge:
         verbose (bool): Whether to enable verbose logging.
         agent (Agent): An instance of the Agent class that performs the evaluation execution.
 
+        evaluation_criteria (Dict[str, float]): Dictionary of evaluation criteria and their weights.
+
     Example:
         Basic usage for evaluating agent outputs:
 
@@ -82,7 +83,6 @@ class AgentJudge:
     Methods:
         step(task: str = None, tasks: List[str] = None, img: str = None) -> str:
             Processes a single task or list of tasks and returns the agent's evaluation.
-
         run(task: str = None, tasks: List[str] = None, img: str = None) -> List[str]:
             Executes evaluation in a loop with context building, collecting responses.
 
@@ -99,6 +99,9 @@ class AgentJudge:
         model_name: str = "openai/o1",
         max_loops: int = 1,
         verbose: bool = False,
+
+        evaluation_criteria: Optional[Dict[str, float]] = None,
+
         *args,
         **kwargs,
     ):
@@ -110,10 +113,23 @@ class AgentJudge:
         self.max_loops = max_loops
         self.verbose = verbose
 
+        self.evaluation_criteria = evaluation_criteria or {}
+        
+        # Enhance system prompt with evaluation criteria if provided
+        enhanced_prompt = system_prompt
+        if self.evaluation_criteria:
+            criteria_str = "\n\nEvaluation Criteria:\n"
+            for criterion, weight in self.evaluation_criteria.items():
+                criteria_str += f"- {criterion}: weight = {weight}\n"
+            enhanced_prompt += criteria_str
+
+
         self.agent = Agent(
             agent_name=agent_name,
             agent_description=description,
-            system_prompt=AGENT_JUDGE_PROMPT,
+
+            system_prompt=enhanced_prompt,
+
             model_name=model_name,
             max_loops=1,
             *args,
@@ -144,6 +160,7 @@ class AgentJudge:
                 f"--- TASK ---\n{task}\n--- END TASK ---\n\n"
                 f"Please provide your improved and fully revised output below."
             )
+
             return agent.run(task=improved_prompt, img=img)
         except Exception as e:
             raise AgentJudgeFeedbackCycleError(
@@ -207,6 +224,7 @@ class AgentJudge:
             # Single task evaluation
             evaluation = judge.step(task="The answer is 42.")
 
+
             # Multiple tasks evaluation
             evaluation = judge.step(tasks=[
                 "Response 1: Paris is the capital of France",
@@ -228,20 +246,29 @@ class AgentJudge:
                 prompt = task
             else:
                 raise ValueError("No tasks or task provided")
+                
+            # 添加评估标准到任务描述中
+            task_instruction = "You are an expert AI agent judge. Carefully review the following output(s) generated by another agent. "
+            task_instruction += "Your job is to provide a detailed, constructive, and actionable critique that will help the agent improve its future performance. "
+            task_instruction += "Your feedback should address the following points:\n"
+            task_instruction += "1. Strengths: What did the agent do well? Highlight any correct reasoning, clarity, or effective problem-solving.\n"
+            task_instruction += "2. Weaknesses: Identify any errors, omissions, unclear reasoning, or areas where the output could be improved.\n"
+            task_instruction += "3. Suggestions: Offer specific, practical recommendations for how the agent can improve its next attempt. "
+            task_instruction += "This may include advice on reasoning, structure, completeness, or style.\n"
+            task_instruction += "4. If relevant, point out any factual inaccuracies or logical inconsistencies.\n"
+            
+            # 在任务说明中添加评估标准
+            if self.evaluation_criteria:
+                criteria_names = list(self.evaluation_criteria.keys())
+                task_instruction += "\nPlease use these specific evaluation criteria with their respective weights:\n"
+                for criterion, weight in self.evaluation_criteria.items():
+                    task_instruction += f"- {criterion}: weight = {weight}\n"
+                    
+            task_instruction += "Be thorough, objective, and professional. Your goal is to help the agent learn and produce better results in the future.\n\n"
+            task_instruction += f"Output(s) to evaluate:\n{prompt}\n"
 
             response = self.agent.run(
-                task=(
-                    "You are an expert AI agent judge. Carefully review the following output(s) generated by another agent. "
-                    "Your job is to provide a detailed, constructive, and actionable critique that will help the agent improve its future performance. "
-                    "Your feedback should address the following points:\n"
-                    "1. Strengths: What did the agent do well? Highlight any correct reasoning, clarity, or effective problem-solving.\n"
-                    "2. Weaknesses: Identify any errors, omissions, unclear reasoning, or areas where the output could be improved.\n"
-                    "3. Suggestions: Offer specific, practical recommendations for how the agent can improve its next attempt. "
-                    "This may include advice on reasoning, structure, completeness, or style.\n"
-                    "4. If relevant, point out any factual inaccuracies or logical inconsistencies.\n"
-                    "Be thorough, objective, and professional. Your goal is to help the agent learn and produce better results in the future.\n\n"
-                    f"Output(s) to evaluate:\n{prompt}\n"
-                ),
+                task=task_instruction,
                 img=img,
             )
 
@@ -330,6 +357,7 @@ class AgentJudge:
                     tasks=contextualized_tasks,
                     img=img,
                 )
+
                 responses.append(current_response)
 
                 # Update context for next iteration
@@ -360,6 +388,7 @@ class AgentJudge:
         this doesn't build context between different tasks - each is evaluated
         independently.
 
+
         Args:
             tasks (List[str], optional): A list of tasks/outputs to be evaluated.
             imgs (List[str], optional): A list of image paths corresponding to each task.
@@ -370,6 +399,7 @@ class AgentJudge:
                            list contains the responses from all iterations (max_loops)
                            for that particular task.
 
+
         Example:
             ```python
             # Batch evaluation with images
@@ -383,7 +413,6 @@ class AgentJudge:
                 "chart1.png",
                 "graph1.png"
             ]
-
             evaluations = judge.run_batched(tasks=tasks, imgs=images)
             # Returns evaluations for each task-image pair
 
@@ -395,6 +424,7 @@ class AgentJudge:
             ])
             ```
 
+
         Note:
             - Each task is processed independently
             - If imgs is provided, it must have the same length as tasks
@@ -405,4 +435,6 @@ class AgentJudge:
         for task, img in zip(tasks, imgs):
             response = self.run(task=task, img=img)
             responses.append(response)
+
         return responses
+