stream added

1 month ago · 638e5514cd
parent b5694e26ae
commit 638e5514cd
2 changed files with 92 additions and 28 deletions
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@ -1052,34 +1052,90 @@ class Agent:
                            )
                            self.memory_query(task_prompt)
                        # This variable will hold the full, structured response object from the LLM
                        response_obj = None
                        # Check if streaming is enabled for this run
                        if self.streaming_on:
                            # This utility is needed to rebuild the full response after streaming is complete
                            from litellm import stream_chunk_builder
                            # Call our wrapper, which will now return a stream/generator
                            if img is not None:
-                            response = self.call_llm(
+                                response_stream = self.call_llm(
                                    task=task_prompt, img=img, *args, **kwargs
                                )
                            else:
                                response_stream = self.call_llm(
                                    task=task_prompt, *args, **kwargs
                                )
                            # Prepare to collect the streaming chunks
                            collected_chunks = []
                            if not self.no_print:
                                # --- FIX: Manually print a header for clarity ---
                                print(f"--- Streaming Response from {self.agent_name} [Loop: {loop_count}] ---")
                                print(f"{self.agent_name}: ", end="")
                            try:
                                # Iterate through the stream in real-time
                                for chunk in response_stream:
                                    collected_chunks.append(chunk)
                                    content = chunk.choices[0].delta.content or ""
                                    # Print each piece of text as it arrives
                                    if content and not self.no_print:
                                        print(content, end="", flush=True)
                            finally:
                                # --- FIX: Manually print a footer for clarity ---
                                if not self.no_print:
                                    print() # Ensures we move to a new line after the stream
                                    print("--- End of Stream ---")
                            # Reconstruct the full response object from all the chunks
                            response_obj = stream_chunk_builder(collected_chunks)
                        else:
                            # If not streaming, just get the complete response object at once
                            if img is not None:
                                response_obj = self.call_llm(
                                    task=task_prompt,
                                    img=img,
                                    *args,
                                    **kwargs,
                                )
                            else:
-                            response = self.call_llm(
+                                response_obj = self.call_llm(
                                    task=task_prompt, *args, **kwargs
                                )
-                        # Parse the response from the agent with the output type
+                        # From this point on, the logic is the same for both streaming and non-streaming
                        if exists(self.tools_list_dictionary):
                            if isinstance(response, BaseModel):
                                response = response.model_dump()
-                        # Parse the response from the agent with the output type
+                        # 1. Get the text-only part of the response for logging and memory
-                        response = self.parse_llm_output(response)
+                        response_text = self.parse_llm_output(response_obj)
                        # 2. Add the text to the agent's short-term memory
                        self.short_memory.add(
                            role=self.agent_name,
-                            content=response,
+                            content=response_text,
                        )
-                        # Print
+                        # 3. If we didn't stream, print the whole response now
-                        self.pretty_print(response, loop_count)
+                        if not self.streaming_on:
                             self.pretty_print(response_text, loop_count)
                        # 4. Pass the COMPLETE response object (with tool data) to the tool executor
                        if exists(self.tools):
                            if (
                                self.output_raw_json_from_tool_call
                                is True
                            ):
                                tool_response_data = response_text
                            else:
                                tool_response_data = response_obj
                            self.execute_tools(
                                response=tool_response_data,
                                loop_count=loop_count,
                            )
                        # Check and execute callable tools
                        if exists(self.tools):
@ -1087,10 +1143,13 @@ class Agent:
                                self.output_raw_json_from_tool_call
                                is True
                            ):
-                                response = response
+                                tool_response_data = response_text
                            else:
                                # The standard, robust way using the full object
                                tool_response_data = response_obj
                            self.execute_tools(
-                                    response=response,
+                                response=tool_response_data,
                                loop_count=loop_count,
                            )
--- a/swarms/utils/litellm_wrapper.py
+++ b/swarms/utils/litellm_wrapper.py
@ -449,7 +449,12 @@ class LiteLLM:
            # Make the completion call
            response = completion(**completion_params)
-            # Handle tool-based response
+            # If streaming is enabled, return the response generator directly.
            # The Agent class will handle iterating through it.
            if self.stream:
                return response
            # If not streaming, process the complete response object as before.
            if self.tools_list_dictionary is not None:
                return self.output_for_tools(response)
            elif self.return_all is True: