Refactor streaming response handling to support print_on parameter for flexible output

5 days ago · 92a9eac8d8
parent 5e5819fc48
commit 92a9eac8d8
2 changed files with 37 additions and 25 deletions
--- a/stream_example.py
+++ b/stream_example.py
@ -6,6 +6,7 @@ agent = Agent(
    model_name="gpt-4o-mini", 
    streaming_on=True,  # 🔥 This enables real streaming!
    max_loops=1,
    print_on=True, # By Default its False, raw streaming !!
 )
 # This will now stream in real-time with beautiful UI!
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@ -2488,25 +2488,38 @@ class Agent:
                # If we get a streaming response, handle it with the new streaming panel
                if hasattr(streaming_response, '__iter__') and not isinstance(streaming_response, str):
-                    # Collect chunks for conversation saving
+                    # Check print_on parameter for different streaming behaviors
-                    collected_chunks = []
+                    if self.print_on is False:
-                    
+                        # Show raw streaming text without formatting panels
-                    def on_chunk_received(chunk: str):
+                        chunks = []
-                        """Callback to collect chunks as they arrive"""
+                        print(f"\n{self.agent_name}: ", end="", flush=True)
-                        collected_chunks.append(chunk)
+                        for chunk in streaming_response:
-                        # Optional: Save each chunk to conversation in real-time
+                            if hasattr(chunk, 'choices') and chunk.choices[0].delta.content:
-                        # This creates a more detailed conversation history
+                                content = chunk.choices[0].delta.content
-                        if self.verbose:
+                                print(content, end="", flush=True)  # Print raw streaming text
-                            logger.debug(f"Streaming chunk received: {chunk[:50]}...")
+                                chunks.append(content)
-                    
+                        print()  # New line after streaming completes
-                    # Use the streaming panel to display and collect the response
+                        complete_response = ''.join(chunks)
-                    complete_response = formatter.print_streaming_panel(
+                    else:
-                        streaming_response,
+                        # Collect chunks for conversation saving
-                        title=f"🤖 {self.agent_name} Streaming Response",
+                        collected_chunks = []
-                        style="bold cyan",
+                        
-                        collect_chunks=True,
+                        def on_chunk_received(chunk: str):
-                        on_chunk_callback=on_chunk_received
+                            """Callback to collect chunks as they arrive"""
-                    )
+                            collected_chunks.append(chunk)
                            # Optional: Save each chunk to conversation in real-time
                            # This creates a more detailed conversation history
                            if self.verbose:
                                logger.debug(f"Streaming chunk received: {chunk[:50]}...")
                        # Use the streaming panel to display and collect the response
                        complete_response = formatter.print_streaming_panel(
                            streaming_response,
                            title=f"🤖 {self.agent_name} Streaming Response",
                            style="bold cyan",
                            collect_chunks=True,
                            on_chunk_callback=on_chunk_received
                        )
                    # Restore original stream setting
                    self.llm.stream = original_stream
@ -2744,12 +2757,10 @@ class Agent:
    def pretty_print(self, response: str, loop_count: int):
        if self.print_on is False:
            if self.streaming_on is True:
-                # self.stream_response(response)
+                # Skip printing here since real streaming is handled in call_llm
-                formatter.print_panel_token_by_token(
+                # This avoids double printing when streaming_on=True
-                    f"{self.agent_name}: {response}",
+                pass
-                    title=f"Agent Name: {self.agent_name} [Max Loops: {loop_count}]",
+            elif self.no_print is True:
                )
            elif self.print_on is True:
                pass
            else:
                # logger.info(f"Response: {response}")