From c43180de149bb95e32195958b734e7ccff9ec870 Mon Sep 17 00:00:00 2001
From: Kye Gomez <kye@swarms.world>
Date: Sun, 11 May 2025 01:43:51 -0700
Subject: [PATCH] agent multi step reasoning update

---
 example.py                          |  14 ++--
 swarms/prompts/max_loop_prompt.py   |  48 ++++++++++++
 swarms/prompts/react_base_prompt.py |  41 ++++++++++
 swarms/structs/agent.py             | 113 +++++++++++-----------------
 4 files changed, 137 insertions(+), 79 deletions(-)
 create mode 100644 swarms/prompts/max_loop_prompt.py
 create mode 100644 swarms/prompts/react_base_prompt.py

diff --git a/example.py b/example.py
index 5985d9c6..ec70ecfc 100644
--- a/example.py
+++ b/example.py
@@ -1,20 +1,16 @@
 from swarms.structs.agent import Agent
-from swarms.prompts.finance_agent_sys_prompt import (
-    FINANCIAL_AGENT_SYS_PROMPT,
-)
 
 # Initialize the agent
 agent = Agent(
     agent_name="Financial-Analysis-Agent",
     agent_description="Personal finance advisor agent",
-    system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
-    max_loops=2,
+    max_loops=4,
     model_name="gpt-4o-mini",
     dynamic_temperature_enabled=True,
     interactive=False,
-    output_type="dict",
+    output_type="all",
 )
 
-print(
-    agent.run("Conduct an analysis of the best real undervalued ETFs")
-)
+agent.run("Conduct an analysis of the best real undervalued ETFs")
+# print(out)
+# print(type(out))
diff --git a/swarms/prompts/max_loop_prompt.py b/swarms/prompts/max_loop_prompt.py
new file mode 100644
index 00000000..2e92c52c
--- /dev/null
+++ b/swarms/prompts/max_loop_prompt.py
@@ -0,0 +1,48 @@
+def generate_reasoning_prompt(max_loops: int) -> str:
+    # You are a deliberate, step-by-step reasoning agent designed to solve complex problems
+    # through iterative reasoning loops.
+
+    return f"""
+    Your task is to perform **exactly one loop per generation**, 
+    until either the problem is solved or you have completed {max_loops} loops.
+
+    ## Instructions:
+
+    - In this generation, perform loop number {{current_loop}} out of {max_loops}.
+    - **Do not perform more than one loop in a single generation.**
+    - Use the **maximum token budget** available to explore, reason, and reflect.
+    - Output must **end** with:
+        - `### End of Loop {{current_loop}}`
+    - **Do not proceed to loop {{current_loop + 1}}** unless explicitly prompted again.
+
+    ## Loop Structure (per generation):
+
+    1. **Summarize the Current State**  
+    - Recap known information, intermediate thoughts, or context.
+
+    2. **Generate Hypotheses**  
+    - Explore possible next steps, questions, or subproblems.
+
+    3. **Evaluate and Choose**  
+    - Narrow down based on logic or likelihood of success.
+
+    4. **Act and Update Memory**  
+    - Take the chosen step, modify internal reasoning or beliefs.
+
+    5. **Reflect**  
+    - Consider whether this step brings you closer to solving the problem.
+    - Suggest whether to continue, backtrack, or finalize.
+
+    ## Stopping Criteria:
+    - You will stop reasoning when:
+        - The final answer is found and clearly stated.
+        - {max_loops} loops have been completed.
+        - You conclude that continued reasoning won't help.
+
+    In the final loop (loop {max_loops}), output your final solution as:
+
+    **Final Answer:** 
+
+    Be methodical, reflective, and token-efficient. Use all available room to think in detail.
+    Do not rush to conclusions. Each loop is isolated and should be treated as its own generation.
+    """
diff --git a/swarms/prompts/react_base_prompt.py b/swarms/prompts/react_base_prompt.py
new file mode 100644
index 00000000..80573b36
--- /dev/null
+++ b/swarms/prompts/react_base_prompt.py
@@ -0,0 +1,41 @@
+REACT_SYS_PROMPT = """You are a thoughtful and methodical AI agent. You solve problems through careful reasoning and by using external tools when needed. You use a "Thought → Action → Observation" loop, repeating as many times as needed to build your understanding and solve the problem. Your goal is not just to answer correctly, but to demonstrate clear reasoning and adaptability.
+
+Follow this structure:
+
+---
+
+Question: [The user’s input]
+
+Thought 1: Understand the question. What is being asked? Break it down into sub-parts. What knowledge or information might be required?
+
+Thought 2: Form a plan. Decide what steps to take. Which facts should be recalled? Which need to be looked up? Which tools should be used?
+
+Action 1: [Use a tool, such as Search[query], Lookup[entity], Calculator[expression], or even Plan[...] if you need to set subgoals]
+Observation 1: [The result from the tool]
+
+Thought 3: Reflect on the observation. What did you learn? What do you now know or still not know? Update your plan if needed.
+
+Action 2: [Next tool or operation]
+Observation 2: [...]
+
+...
+
+[Repeat Thought → Action → Observation as needed]
+
+Thought N: You now have all the necessary information. Synthesize what you know. Reconstruct the answer clearly, and justify it.
+
+Action N: Finish[final_answer]
+
+---
+
+Guidelines for Reasoning:
+- Always **start by interpreting the problem carefully.**
+- If the question is complex, **break it into parts** and tackle each.
+- **Think before you act.** Plan actions deliberately, not reflexively.
+- Use **search engines** or **lookup tools** for facts, definitions, or current events.
+- Use a **calculator** for numerical operations.
+- Use **Reflection** steps if your observations are unclear, surprising, or contradictory.
+- Don't rush to finish — **reasoning is more important than speed.**
+- When concluding, make sure your **answer is fully supported** by earlier steps.
+
+"""
diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py
index 770fb096..d137999a 100644
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@@ -65,6 +65,8 @@ from swarms.utils.litellm_tokenizer import count_tokens
 from swarms.utils.litellm_wrapper import LiteLLM
 from swarms.utils.pdf_to_text import pdf_to_text
 from swarms.utils.str_to_dict import str_to_dict
+from swarms.prompts.react_base_prompt import REACT_SYS_PROMPT
+from swarms.prompts.max_loop_prompt import generate_reasoning_prompt
 
 
 # Utils
@@ -302,7 +304,7 @@ class Agent:
         saved_state_path: Optional[str] = None,
         autosave: Optional[bool] = False,
         context_length: Optional[int] = 8192,
-        user_name: Optional[str] = "Human:",
+        user_name: Optional[str] = "Human",
         self_healing_enabled: Optional[bool] = False,
         code_interpreter: Optional[bool] = False,
         multi_modal: Optional[bool] = None,
@@ -396,6 +398,7 @@ class Agent:
         mcp_servers: MCPServerSseParams = None,
         mcp_url: str = None,
         mcp_urls: List[str] = None,
+        react_on: bool = False,
         *args,
         **kwargs,
     ):
@@ -518,12 +521,13 @@ class Agent:
         self.mcp_servers = mcp_servers
         self.mcp_url = mcp_url
         self.mcp_urls = mcp_urls
+        self.react_on = react_on
 
         self._cached_llm = (
             None  # Add this line to cache the LLM instance
         )
 
-        self.short_memory = self.short_memory_init()
+        # self.short_memory = self.short_memory_init()
 
         # Initialize the feedback
         self.feedback = []
@@ -555,6 +559,16 @@ class Agent:
         if self.mcp_url or self.mcp_servers is not None:
             self.add_mcp_tools_to_memory()
 
+        if self.react_on is True:
+            self.system_prompt += REACT_SYS_PROMPT
+
+        if len(self.max_loops) > 1:
+            self.system_prompt += generate_reasoning_prompt(
+                self.max_loops
+            )
+
+        self.short_memory = self.short_memory_init()
+
     def short_memory_init(self):
         if (
             self.agent_name is not None
@@ -575,33 +589,6 @@ class Agent:
 
         return self.short_memory
 
-    def init_handling(self):
-        # Define tasks as pairs of (function, condition)
-        # Each task will only run if its condition is True
-        self.setup_config()
-
-        if exists(self.docs_folder):
-            self.get_docs_from_doc_folders()
-
-        if exists(self.tools):
-            self.handle_tool_init()
-
-        if exists(self.tool_schema) or exists(self.list_base_models):
-            self.handle_tool_schema_ops()
-
-        if exists(self.sop) or exists(self.sop_list):
-            self.handle_sop_ops()
-
-        # Run sequential operations after all concurrent tasks are done
-        # self.agent_output = self.agent_output_model()
-        log_agent_data(self.to_dict())
-
-        if self.llm is None:
-            self.llm = self.llm_handling()
-
-        if self.mcp_url or self.mcp_servers is not None:
-            self.add_mcp_tools_to_memory()
-
     def agent_output_model(self):
         # Many steps
         id = agent_id()
@@ -1021,24 +1008,13 @@ class Agent:
             agent(task="What is the capital of France?", img="path/to/image.jpg", is_last=True)
         """
         try:
-            # 1. Batch process initial setup
-            setup_tasks = [
-                lambda: self.check_if_no_prompt_then_autogenerate(
-                    task
-                ),
-                lambda: self.short_memory.add(
-                    role=self.user_name, content=task
-                ),
-                lambda: (
-                    self.plan(task) if self.plan_enabled else None
-                ),
-            ]
 
-            # Execute setup tasks concurrently
-            with ThreadPoolExecutor(
-                max_workers=len(setup_tasks)
-            ) as executor:
-                executor.map(lambda f: f(), setup_tasks)
+            self.check_if_no_prompt_then_autogenerate(task)
+
+            self.short_memory.add(role=self.user_name, content=task)
+
+            if self.plan_enabled:
+                self.plan(task)
 
             # Set the loop count
             loop_count = 0
@@ -1068,10 +1044,18 @@ class Agent:
             ):
                 loop_count += 1
 
-                # self.short_memory.add(
-                #     role=f"{self.agent_name}",
-                #     content=f"Internal Reasoning Loop: {loop_count} of {self.max_loops}",
-                # )
+                if len(self.max_loops) > 1:
+                    self.short_memory.add(
+                        role=self.agent_name,
+                        content=f"Current Internal Reasoning Loop: {loop_count}/{self.max_loops}",
+                    )
+
+                # If it is the final loop, then add the final loop message
+                if loop_count == self.max_loops:
+                    self.short_memory.add(
+                        role=self.agent_name,
+                        content=f"🎉 Final Internal Reasoning Loop: {loop_count}/{self.max_loops} Prepare your comprehensive response.",
+                    )
 
                 # Dynamic temperature
                 if self.dynamic_temperature_enabled is True:
@@ -1148,7 +1132,11 @@ class Agent:
                                     self.streaming_on,
                                 )
 
-                            out = self.llm.run(out)
+                            out = self.call_llm(task=out)
+
+                            self.short_memory.add(
+                                role=self.agent_name, content=out
+                            )
 
                             if self.no_print is False:
                                 agent_print(
@@ -1158,10 +1146,6 @@ class Agent:
                                     self.streaming_on,
                                 )
 
-                            self.short_memory.add(
-                                role=self.agent_name, content=out
-                            )
-
                         self.sentiment_and_evaluator(response)
 
                         success = True  # Mark as successful to exit the retry loop
@@ -1219,7 +1203,7 @@ class Agent:
                         break
 
                     self.short_memory.add(
-                        role="User", content=user_input
+                        role=self.user_name, content=user_input
                     )
 
                 if self.loop_interval:
@@ -1233,21 +1217,10 @@ class Agent:
 
                 self.save()
 
-            # log_agent_data(self.to_dict())
-
-            # if self.autosave is True:
-            #     self.save()
-
-            # 14. Batch final operations
-            final_tasks = [
-                lambda: log_agent_data(self.to_dict()),
-                lambda: self.save() if self.autosave else None,
-            ]
+            log_agent_data(self.to_dict())
 
-            with ThreadPoolExecutor(
-                max_workers=len(final_tasks)
-            ) as executor:
-                executor.map(lambda f: f(), final_tasks)
+            if self.autosave:
+                self.save()
 
             return history_output_formatter(
                 self.short_memory, type=self.output_type