|
|
@ -1419,36 +1419,19 @@ class Agent:
|
|
|
|
# Make a final call to the LLM to summarize the tool results if tool_call_summary is enabled
|
|
|
|
# Make a final call to the LLM to summarize the tool results if tool_call_summary is enabled
|
|
|
|
if self.tool_call_summary:
|
|
|
|
if self.tool_call_summary:
|
|
|
|
temp_llm = self.temp_llm_instance_for_tool_summary()
|
|
|
|
temp_llm = self.temp_llm_instance_for_tool_summary()
|
|
|
|
final_summary_response = temp_llm.run(
|
|
|
|
|
|
|
|
task=f"Please analyze and summarize the following tool execution output:\n\n{format_data_structure(tool_results)}"
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Handle streaming for final tool summary in real-time execution
|
|
|
|
# Use centralized streaming logic for real-time tool summary
|
|
|
|
if self.streaming_on and hasattr(final_summary_response, "__iter__") and not isinstance(final_summary_response, str):
|
|
|
|
if self.streaming_on:
|
|
|
|
# Collect chunks for conversation saving
|
|
|
|
final_summary_response = temp_llm.run_tool_summary_with_streaming(
|
|
|
|
collected_chunks = []
|
|
|
|
tool_results=format_data_structure(tool_results),
|
|
|
|
|
|
|
|
agent_name=f"{self.agent_name} - Real-time",
|
|
|
|
def on_chunk_received(chunk: str):
|
|
|
|
print_on=self.print_on,
|
|
|
|
"""Callback to collect chunks as they arrive"""
|
|
|
|
verbose=self.verbose,
|
|
|
|
collected_chunks.append(chunk)
|
|
|
|
)
|
|
|
|
if self.verbose:
|
|
|
|
else:
|
|
|
|
logger.debug(f"Real-time tool summary streaming chunk: {chunk[:50]}...")
|
|
|
|
final_summary_response = temp_llm.run(
|
|
|
|
|
|
|
|
task=f"Please analyze and summarize the following tool execution output:\n\n{format_data_structure(tool_results)}"
|
|
|
|
# Use the streaming panel to display and collect the response
|
|
|
|
|
|
|
|
complete_response = formatter.print_streaming_panel(
|
|
|
|
|
|
|
|
final_summary_response,
|
|
|
|
|
|
|
|
title=f"🤖 Agent: {self.agent_name} - Tool Summary (Real-time)",
|
|
|
|
|
|
|
|
style="green",
|
|
|
|
|
|
|
|
collect_chunks=True,
|
|
|
|
|
|
|
|
on_chunk_callback=on_chunk_received,
|
|
|
|
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
final_summary_response = complete_response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif self.streaming_on and isinstance(final_summary_response, str):
|
|
|
|
|
|
|
|
# If streaming is on but we got a string response, display it streamed
|
|
|
|
|
|
|
|
if self.print_on:
|
|
|
|
|
|
|
|
self.stream_response(final_summary_response, delay=0.01)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
response = self.parse_llm_output(final_summary_response)
|
|
|
|
response = self.parse_llm_output(final_summary_response)
|
|
|
|
self.short_memory.add(role=self.agent_name, content=response)
|
|
|
|
self.short_memory.add(role=self.agent_name, content=response)
|
|
|
@ -2467,11 +2450,11 @@ class Agent:
|
|
|
|
self, response: str, delay: float = 0.001
|
|
|
|
self, response: str, delay: float = 0.001
|
|
|
|
) -> None:
|
|
|
|
) -> None:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Streams the response token by token.
|
|
|
|
Streams the response token by token using centralized wrapper logic.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
Args:
|
|
|
|
response (str): The response text to be streamed.
|
|
|
|
response (str): The response text to be streamed.
|
|
|
|
delay (float, optional): Delay in seconds between printing each token. Default is 0.1 seconds.
|
|
|
|
delay (float, optional): Delay in seconds between printing each token. Default is 0.001 seconds.
|
|
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
Raises:
|
|
|
|
ValueError: If the response is not provided.
|
|
|
|
ValueError: If the response is not provided.
|
|
|
@ -2479,18 +2462,26 @@ class Agent:
|
|
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
Example:
|
|
|
|
response = "This is a sample response from the API."
|
|
|
|
response = "This is a sample response from the API."
|
|
|
|
stream_response(response)
|
|
|
|
agent.stream_response(response)
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
# Check for required inputs
|
|
|
|
# Check for required inputs
|
|
|
|
if not response:
|
|
|
|
if not response:
|
|
|
|
raise ValueError("Response is required.")
|
|
|
|
raise ValueError("Response is required.")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
# Stream and print the response token by token
|
|
|
|
# Use centralized string streaming from wrapper
|
|
|
|
for token in response.split():
|
|
|
|
if hasattr(self.llm, "handle_string_streaming"):
|
|
|
|
print(token, end=" ", flush=True)
|
|
|
|
self.llm.handle_string_streaming(
|
|
|
|
time.sleep(delay)
|
|
|
|
response=response,
|
|
|
|
print() # Ensure a newline after streaming
|
|
|
|
print_on=self.print_on,
|
|
|
|
|
|
|
|
delay=delay,
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
# Fallback to original implementation if wrapper doesn't support it
|
|
|
|
|
|
|
|
for token in response.split():
|
|
|
|
|
|
|
|
print(token, end=" ", flush=True)
|
|
|
|
|
|
|
|
time.sleep(delay)
|
|
|
|
|
|
|
|
print() # Ensure a newline after streaming
|
|
|
|
except Exception as e:
|
|
|
|
except Exception as e:
|
|
|
|
print(f"An error occurred during streaming: {e}")
|
|
|
|
print(f"An error occurred during streaming: {e}")
|
|
|
|
|
|
|
|
|
|
|
@ -2800,95 +2791,24 @@ class Agent:
|
|
|
|
del kwargs["is_last"]
|
|
|
|
del kwargs["is_last"]
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
# Set streaming parameter in LLM if streaming is enabled
|
|
|
|
# Use centralized streaming logic from wrapper if streaming is enabled
|
|
|
|
if self.streaming_on and hasattr(self.llm, "stream"):
|
|
|
|
if self.streaming_on and hasattr(self.llm, "run_with_streaming"):
|
|
|
|
original_stream = self.llm.stream
|
|
|
|
return self.llm.run_with_streaming(
|
|
|
|
self.llm.stream = True
|
|
|
|
task=task,
|
|
|
|
|
|
|
|
img=img,
|
|
|
|
if img is not None:
|
|
|
|
streaming_callback=streaming_callback,
|
|
|
|
streaming_response = self.llm.run(
|
|
|
|
title=f"🤖 Agent: {self.agent_name} Loops: {current_loop}",
|
|
|
|
task=task, img=img, *args, **kwargs
|
|
|
|
print_on=self.print_on,
|
|
|
|
)
|
|
|
|
verbose=self.verbose,
|
|
|
|
else:
|
|
|
|
*args,
|
|
|
|
streaming_response = self.llm.run(
|
|
|
|
**kwargs,
|
|
|
|
task=task, *args, **kwargs
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# If we get a streaming response, handle it with the new streaming panel
|
|
|
|
|
|
|
|
if hasattr(
|
|
|
|
|
|
|
|
streaming_response, "__iter__"
|
|
|
|
|
|
|
|
) and not isinstance(streaming_response, str):
|
|
|
|
|
|
|
|
# Check if streaming_callback is provided (for ConcurrentWorkflow dashboard integration)
|
|
|
|
|
|
|
|
if streaming_callback is not None:
|
|
|
|
|
|
|
|
# Real-time callback streaming for dashboard integration
|
|
|
|
|
|
|
|
chunks = []
|
|
|
|
|
|
|
|
for chunk in streaming_response:
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
|
|
|
hasattr(chunk, "choices")
|
|
|
|
|
|
|
|
and chunk.choices[0].delta.content
|
|
|
|
|
|
|
|
):
|
|
|
|
|
|
|
|
content = chunk.choices[
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
].delta.content
|
|
|
|
|
|
|
|
chunks.append(content)
|
|
|
|
|
|
|
|
# Call the streaming callback with the new chunk
|
|
|
|
|
|
|
|
streaming_callback(content)
|
|
|
|
|
|
|
|
complete_response = "".join(chunks)
|
|
|
|
|
|
|
|
# Check print_on parameter for different streaming behaviors
|
|
|
|
|
|
|
|
elif self.print_on is False:
|
|
|
|
|
|
|
|
# Silent streaming - no printing, just collect chunks
|
|
|
|
|
|
|
|
chunks = []
|
|
|
|
|
|
|
|
for chunk in streaming_response:
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
|
|
|
hasattr(chunk, "choices")
|
|
|
|
|
|
|
|
and chunk.choices[0].delta.content
|
|
|
|
|
|
|
|
):
|
|
|
|
|
|
|
|
content = chunk.choices[
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
].delta.content
|
|
|
|
|
|
|
|
chunks.append(content)
|
|
|
|
|
|
|
|
complete_response = "".join(chunks)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
# Collect chunks for conversation saving
|
|
|
|
|
|
|
|
collected_chunks = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def on_chunk_received(chunk: str):
|
|
|
|
|
|
|
|
"""Callback to collect chunks as they arrive"""
|
|
|
|
|
|
|
|
collected_chunks.append(chunk)
|
|
|
|
|
|
|
|
# Optional: Save each chunk to conversation in real-time
|
|
|
|
|
|
|
|
# This creates a more detailed conversation history
|
|
|
|
|
|
|
|
if self.verbose:
|
|
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
|
|
f"Streaming chunk received: {chunk[:50]}..."
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Use the streaming panel to display and collect the response
|
|
|
|
|
|
|
|
complete_response = formatter.print_streaming_panel(
|
|
|
|
|
|
|
|
streaming_response,
|
|
|
|
|
|
|
|
title=f"🤖 Agent: {self.agent_name} Loops: {current_loop}",
|
|
|
|
|
|
|
|
style=None, # Use random color like non-streaming approach
|
|
|
|
|
|
|
|
collect_chunks=True,
|
|
|
|
|
|
|
|
on_chunk_callback=on_chunk_received,
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Restore original stream setting
|
|
|
|
|
|
|
|
self.llm.stream = original_stream
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Return the complete response for further processing
|
|
|
|
|
|
|
|
return complete_response
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
# Restore original stream setting
|
|
|
|
|
|
|
|
self.llm.stream = original_stream
|
|
|
|
|
|
|
|
return streaming_response
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
# Non-streaming call
|
|
|
|
# Non-streaming call
|
|
|
|
if img is not None:
|
|
|
|
if img is not None:
|
|
|
|
out = self.llm.run(
|
|
|
|
out = self.llm.run(task=task, img=img, *args, **kwargs)
|
|
|
|
task=task, img=img, *args, **kwargs
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
out = self.llm.run(task=task, *args, **kwargs)
|
|
|
|
out = self.llm.run(task=task, *args, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
return out
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
except AgentLLMError as e:
|
|
|
|
except AgentLLMError as e:
|
|
|
@ -3259,36 +3179,17 @@ class Agent:
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
temp_llm = self.temp_llm_instance_for_tool_summary()
|
|
|
|
temp_llm = self.temp_llm_instance_for_tool_summary()
|
|
|
|
|
|
|
|
|
|
|
|
summary = temp_llm.run(
|
|
|
|
# Use centralized streaming logic for MCP tool summary
|
|
|
|
task=self.short_memory.get_str()
|
|
|
|
if self.streaming_on:
|
|
|
|
)
|
|
|
|
summary = temp_llm.run_with_streaming(
|
|
|
|
|
|
|
|
task=self.short_memory.get_str(),
|
|
|
|
# Handle streaming MCP tool summary response
|
|
|
|
|
|
|
|
if self.streaming_on and hasattr(summary, "__iter__") and not isinstance(summary, str):
|
|
|
|
|
|
|
|
# Collect chunks for conversation saving
|
|
|
|
|
|
|
|
collected_chunks = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def on_chunk_received(chunk: str):
|
|
|
|
|
|
|
|
"""Callback to collect chunks as they arrive"""
|
|
|
|
|
|
|
|
collected_chunks.append(chunk)
|
|
|
|
|
|
|
|
if self.verbose:
|
|
|
|
|
|
|
|
logger.debug(f"MCP tool summary streaming chunk received: {chunk[:50]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Use the streaming panel to display and collect the response
|
|
|
|
|
|
|
|
complete_response = formatter.print_streaming_panel(
|
|
|
|
|
|
|
|
summary,
|
|
|
|
|
|
|
|
title=f"🤖 Agent: {self.agent_name} - MCP Tool Summary",
|
|
|
|
title=f"🤖 Agent: {self.agent_name} - MCP Tool Summary",
|
|
|
|
style="cyan",
|
|
|
|
style="cyan",
|
|
|
|
collect_chunks=True,
|
|
|
|
print_on=self.print_on,
|
|
|
|
on_chunk_callback=on_chunk_received,
|
|
|
|
verbose=self.verbose,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
else:
|
|
|
|
summary = complete_response
|
|
|
|
summary = temp_llm.run(task=self.short_memory.get_str())
|
|
|
|
|
|
|
|
|
|
|
|
elif self.streaming_on and isinstance(summary, str):
|
|
|
|
|
|
|
|
# If streaming is on but we got a string response, display it streamed
|
|
|
|
|
|
|
|
if self.print_on:
|
|
|
|
|
|
|
|
self.stream_response(summary, delay=0.01)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
except Exception as e:
|
|
|
|
logger.error(
|
|
|
|
logger.error(
|
|
|
@ -3310,6 +3211,7 @@ class Agent:
|
|
|
|
raise e
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
|
|
def temp_llm_instance_for_tool_summary(self):
|
|
|
|
def temp_llm_instance_for_tool_summary(self):
|
|
|
|
|
|
|
|
from swarms.utils.litellm_wrapper import LiteLLM
|
|
|
|
return LiteLLM(
|
|
|
|
return LiteLLM(
|
|
|
|
model_name=self.model_name,
|
|
|
|
model_name=self.model_name,
|
|
|
|
temperature=self.temperature,
|
|
|
|
temperature=self.temperature,
|
|
|
@ -3373,34 +3275,16 @@ class Agent:
|
|
|
|
{output}
|
|
|
|
{output}
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
tool_response = temp_llm.run(tool_summary_prompt)
|
|
|
|
# Use centralized streaming logic for tool summary
|
|
|
|
|
|
|
|
if self.streaming_on:
|
|
|
|
# Handle streaming tool response
|
|
|
|
tool_response = temp_llm.run_tool_summary_with_streaming(
|
|
|
|
if self.streaming_on and hasattr(tool_response, "__iter__") and not isinstance(tool_response, str):
|
|
|
|
tool_results=str(output),
|
|
|
|
# Collect chunks for conversation saving
|
|
|
|
agent_name=self.agent_name,
|
|
|
|
collected_chunks = []
|
|
|
|
print_on=self.print_on,
|
|
|
|
|
|
|
|
verbose=self.verbose,
|
|
|
|
def on_chunk_received(chunk: str):
|
|
|
|
|
|
|
|
"""Callback to collect chunks as they arrive"""
|
|
|
|
|
|
|
|
collected_chunks.append(chunk)
|
|
|
|
|
|
|
|
if self.verbose:
|
|
|
|
|
|
|
|
logger.debug(f"Tool response streaming chunk received: {chunk[:50]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Use the streaming panel to display and collect the response
|
|
|
|
|
|
|
|
complete_response = formatter.print_streaming_panel(
|
|
|
|
|
|
|
|
tool_response,
|
|
|
|
|
|
|
|
title=f"🤖 Agent: {self.agent_name} - Tool Summary",
|
|
|
|
|
|
|
|
style="blue",
|
|
|
|
|
|
|
|
collect_chunks=True,
|
|
|
|
|
|
|
|
on_chunk_callback=on_chunk_received,
|
|
|
|
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
else:
|
|
|
|
tool_response = complete_response
|
|
|
|
tool_response = temp_llm.run(tool_summary_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
elif self.streaming_on and isinstance(tool_response, str):
|
|
|
|
|
|
|
|
# If streaming is on but we got a string response, display it streamed
|
|
|
|
|
|
|
|
if self.print_on:
|
|
|
|
|
|
|
|
self.stream_response(tool_response, delay=0.01)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Add the tool response to memory
|
|
|
|
# Add the tool response to memory
|
|
|
|
self.short_memory.add(
|
|
|
|
self.short_memory.add(
|
|
|
|