diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py
index 12d2306a..357afb25 100644
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@@ -1052,34 +1052,90 @@ class Agent:
                             )
                             self.memory_query(task_prompt)
 
-                        if img is not None:
-                            response = self.call_llm(
-                                task=task_prompt,
-                                img=img,
-                                *args,
-                                **kwargs,
-                            )
-                        else:
-                            response = self.call_llm(
-                                task=task_prompt, *args, **kwargs
-                            )
-
-                        # Parse the response from the agent with the output type
-                        if exists(self.tools_list_dictionary):
-                            if isinstance(response, BaseModel):
-                                response = response.model_dump()
-
-                        # Parse the response from the agent with the output type
-                        response = self.parse_llm_output(response)
+                        # This variable will hold the full, structured response object from the LLM
+                        response_obj = None
+
+                        # Check if streaming is enabled for this run
+                        if self.streaming_on:
+                            # This utility is needed to rebuild the full response after streaming is complete
+                            from litellm import stream_chunk_builder
+
+                            # Call our wrapper, which will now return a stream/generator
+                            if img is not None:
+                                response_stream = self.call_llm(
+                                    task=task_prompt, img=img, *args, **kwargs
+                                )
+                            else:
+                                response_stream = self.call_llm(
+                                    task=task_prompt, *args, **kwargs
+                                )
 
+                            # Prepare to collect the streaming chunks
+                            collected_chunks = []
+                            if not self.no_print:
+                                # --- FIX: Manually print a header for clarity ---
+                                print(f"--- Streaming Response from {self.agent_name} [Loop: {loop_count}] ---")
+                                print(f"{self.agent_name}: ", end="")
+                            
+                            try:
+                                # Iterate through the stream in real-time
+                                for chunk in response_stream:
+                                    collected_chunks.append(chunk)
+                                    content = chunk.choices[0].delta.content or ""
+                                    # Print each piece of text as it arrives
+                                    if content and not self.no_print:
+                                        print(content, end="", flush=True)
+                            finally:
+                                # --- FIX: Manually print a footer for clarity ---
+                                if not self.no_print:
+                                    print() # Ensures we move to a new line after the stream
+                                    print("--- End of Stream ---")
+                            
+                            # Reconstruct the full response object from all the chunks
+                            response_obj = stream_chunk_builder(collected_chunks)
+                        else:
+                            # If not streaming, just get the complete response object at once
+                            if img is not None:
+                                response_obj = self.call_llm(
+                                    task=task_prompt,
+                                    img=img,
+                                    *args,
+                                    **kwargs,
+                                )
+                            else:
+                                response_obj = self.call_llm(
+                                    task=task_prompt, *args, **kwargs
+                                )
+                        
+                        # From this point on, the logic is the same for both streaming and non-streaming
+                        
+                        # 1. Get the text-only part of the response for logging and memory
+                        response_text = self.parse_llm_output(response_obj)
+                        
+                        # 2. Add the text to the agent's short-term memory
                         self.short_memory.add(
                             role=self.agent_name,
-                            content=response,
+                            content=response_text,
                         )
 
-                        # Print
-                        self.pretty_print(response, loop_count)
+                        # 3. If we didn't stream, print the whole response now
+                        if not self.streaming_on:
+                             self.pretty_print(response_text, loop_count)
 
+                        # 4. Pass the COMPLETE response object (with tool data) to the tool executor
+                        if exists(self.tools):
+                            if (
+                                self.output_raw_json_from_tool_call
+                                is True
+                            ):
+                                tool_response_data = response_text
+                            else:
+                                tool_response_data = response_obj
+                            
+                            self.execute_tools(
+                                response=tool_response_data,
+                                loop_count=loop_count,
+                            )
                         # Check and execute callable tools
                         if exists(self.tools):
 
@@ -1087,12 +1143,15 @@ class Agent:
                                 self.output_raw_json_from_tool_call
                                 is True
                             ):
-                                response = response
+                                tool_response_data = response_text
                             else:
-                                self.execute_tools(
-                                    response=response,
-                                    loop_count=loop_count,
-                                )
+                                # The standard, robust way using the full object
+                                tool_response_data = response_obj
+                            
+                            self.execute_tools(
+                                response=tool_response_data,
+                                loop_count=loop_count,
+                            )
 
                         # Handle MCP tools
                         if (
diff --git a/swarms/utils/litellm_wrapper.py b/swarms/utils/litellm_wrapper.py
index 6aa5c7d3..6f303077 100644
--- a/swarms/utils/litellm_wrapper.py
+++ b/swarms/utils/litellm_wrapper.py
@@ -449,7 +449,12 @@ class LiteLLM:
             # Make the completion call
             response = completion(**completion_params)
 
-            # Handle tool-based response
+            # If streaming is enabled, return the response generator directly.
+            # The Agent class will handle iterating through it.
+            if self.stream:
+                return response
+
+            # If not streaming, process the complete response object as before.
             if self.tools_list_dictionary is not None:
                 return self.output_for_tools(response)
             elif self.return_all is True: