pull/938/head
harshalmore31 1 day ago
parent 9ca24fd545
commit beac03c67d

@ -1360,9 +1360,39 @@ class Agent:
final_summary_response = temp_llm.run( final_summary_response = temp_llm.run(
task=f"Please analyze and summarize the following tool execution output:\n\n{format_data_structure(tool_results)}" task=f"Please analyze and summarize the following tool execution output:\n\n{format_data_structure(tool_results)}"
) )
# Handle streaming for final tool summary in real-time execution
if self.streaming_on and hasattr(final_summary_response, "__iter__") and not isinstance(final_summary_response, str):
# Collect chunks for conversation saving
collected_chunks = []
def on_chunk_received(chunk: str):
"""Callback to collect chunks as they arrive"""
collected_chunks.append(chunk)
if self.verbose:
logger.debug(f"Real-time tool summary streaming chunk: {chunk[:50]}...")
# Use the streaming panel to display and collect the response
complete_response = formatter.print_streaming_panel(
final_summary_response,
title=f"🤖 Agent: {self.agent_name} - Tool Summary (Real-time)",
style="green",
collect_chunks=True,
on_chunk_callback=on_chunk_received,
)
final_summary_response = complete_response
elif self.streaming_on and isinstance(final_summary_response, str):
# If streaming is on but we got a string response, display it streamed
if self.print_on:
self.stream_response(final_summary_response, delay=0.01)
response = self.parse_llm_output(final_summary_response) response = self.parse_llm_output(final_summary_response)
self.short_memory.add(role=self.agent_name, content=response) self.short_memory.add(role=self.agent_name, content=response)
if self.print_on:
# Only pretty_print if streaming is off (to avoid double printing)
if self.print_on and not self.streaming_on:
self.pretty_print(response, loop_count) self.pretty_print(response, loop_count)
else: else:
response = f"Tool execution completed: {format_data_structure(tool_results)}" response = f"Tool execution completed: {format_data_structure(tool_results)}"
@ -3169,6 +3199,34 @@ class Agent:
summary = temp_llm.run( summary = temp_llm.run(
task=self.short_memory.get_str() task=self.short_memory.get_str()
) )
# Handle streaming MCP tool summary response
if self.streaming_on and hasattr(summary, "__iter__") and not isinstance(summary, str):
# Collect chunks for conversation saving
collected_chunks = []
def on_chunk_received(chunk: str):
"""Callback to collect chunks as they arrive"""
collected_chunks.append(chunk)
if self.verbose:
logger.debug(f"MCP tool summary streaming chunk received: {chunk[:50]}...")
# Use the streaming panel to display and collect the response
complete_response = formatter.print_streaming_panel(
summary,
title=f"🤖 Agent: {self.agent_name} - MCP Tool Summary",
style="cyan",
collect_chunks=True,
on_chunk_callback=on_chunk_received,
)
summary = complete_response
elif self.streaming_on and isinstance(summary, str):
# If streaming is on but we got a string response, display it streamed
if self.print_on:
self.stream_response(summary, delay=0.01)
except Exception as e: except Exception as e:
logger.error( logger.error(
f"Error calling LLM after MCP tool execution: {e}" f"Error calling LLM after MCP tool execution: {e}"
@ -3176,7 +3234,8 @@ class Agent:
# Fallback: provide a default summary # Fallback: provide a default summary
summary = "I successfully executed the MCP tool and retrieved the information above." summary = "I successfully executed the MCP tool and retrieved the information above."
if self.print_on is True: # Only pretty_print if streaming is off (to avoid double printing)
if self.print_on and not self.streaming_on:
self.pretty_print(summary, loop_count=current_loop) self.pretty_print(summary, loop_count=current_loop)
# Add to the memory # Add to the memory
@ -3193,7 +3252,7 @@ class Agent:
temperature=self.temperature, temperature=self.temperature,
max_tokens=self.max_tokens, max_tokens=self.max_tokens,
system_prompt=self.system_prompt, system_prompt=self.system_prompt,
stream=False, # Always disable streaming for tool summaries stream=self.streaming_on,
tools_list_dictionary=None, tools_list_dictionary=None,
parallel_tool_calls=False, parallel_tool_calls=False,
base_url=self.llm_base_url, base_url=self.llm_base_url,
@ -3239,26 +3298,55 @@ class Agent:
if self.tool_call_summary is True: if self.tool_call_summary is True:
temp_llm = self.temp_llm_instance_for_tool_summary() temp_llm = self.temp_llm_instance_for_tool_summary()
tool_response = temp_llm.run( tool_summary_prompt = f"""
f""" Please analyze and summarize the following tool execution output in a clear and concise way.
Please analyze and summarize the following tool execution output in a clear and concise way. Focus on the key information and insights that would be most relevant to the user's original request.
Focus on the key information and insights that would be most relevant to the user's original request. If there are any errors or issues, highlight them prominently.
If there are any errors or issues, highlight them prominently.
The user's original request was: The user's original request was:
{self.task} {self.task}
Tool Output: Tool Output:
{output} {output}
""" """
)
tool_response = temp_llm.run(tool_summary_prompt)
# Handle streaming tool response
if self.streaming_on and hasattr(tool_response, "__iter__") and not isinstance(tool_response, str):
# Collect chunks for conversation saving
collected_chunks = []
def on_chunk_received(chunk: str):
"""Callback to collect chunks as they arrive"""
collected_chunks.append(chunk)
if self.verbose:
logger.debug(f"Tool response streaming chunk received: {chunk[:50]}...")
# Use the streaming panel to display and collect the response
complete_response = formatter.print_streaming_panel(
tool_response,
title=f"🤖 Agent: {self.agent_name} - Tool Summary",
style="blue",
collect_chunks=True,
on_chunk_callback=on_chunk_received,
)
tool_response = complete_response
elif self.streaming_on and isinstance(tool_response, str):
# If streaming is on but we got a string response, display it streamed
if self.print_on:
self.stream_response(tool_response, delay=0.01)
# Add the tool response to memory
self.short_memory.add( self.short_memory.add(
role=self.agent_name, role=self.agent_name,
content=tool_response, content=tool_response,
) )
if self.print_on is True: # Only pretty_print if streaming is off (to avoid double printing)
if self.print_on and not self.streaming_on:
self.pretty_print( self.pretty_print(
tool_response, tool_response,
loop_count, loop_count,

@ -153,6 +153,134 @@ class LiteLLM:
litellm.drop_params = True litellm.drop_params = True
def _collect_streaming_response(self, streaming_response):
"""
Parse and yield individual content chunks from a streaming response.
Args:
streaming_response: The streaming response object from litellm
Yields:
str: Individual content chunks as they arrive
"""
try:
for chunk in streaming_response:
content = None
# Handle different chunk formats
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
# OpenAI-style chunks
if hasattr(choice, 'delta') and choice.delta:
if hasattr(choice.delta, 'content') and choice.delta.content:
content = choice.delta.content
# Alternative chunk format
elif hasattr(choice, 'message') and choice.message:
if hasattr(choice.message, 'content') and choice.message.content:
content = choice.message.content
# Anthropic-style chunks
elif hasattr(chunk, 'type'):
if chunk.type == 'content_block_delta' and hasattr(chunk, 'delta'):
if chunk.delta.type == 'text_delta':
content = chunk.delta.text
# Handle direct content chunks
elif hasattr(chunk, 'content'):
content = chunk.content
# Yield content chunk if we found any
if content:
yield content
except Exception as e:
logger.error(f"Error parsing streaming chunks: {e}")
return
async def _collect_streaming_response_async(self, streaming_response):
"""
Parse and yield individual content chunks from an async streaming response.
Args:
streaming_response: The async streaming response object from litellm
Yields:
str: Individual content chunks as they arrive
"""
try:
async for chunk in streaming_response:
content = None
# Handle different chunk formats
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
# OpenAI-style chunks
if hasattr(choice, 'delta') and choice.delta:
if hasattr(choice.delta, 'content') and choice.delta.content:
content = choice.delta.content
# Alternative chunk format
elif hasattr(choice, 'message') and choice.message:
if hasattr(choice.message, 'content') and choice.message.content:
content = choice.message.content
# Anthropic-style chunks
elif hasattr(chunk, 'type'):
if chunk.type == 'content_block_delta' and hasattr(chunk, 'delta'):
if chunk.delta.type == 'text_delta':
content = chunk.delta.text
# Handle direct content chunks
elif hasattr(chunk, 'content'):
content = chunk.content
# Yield content chunk if we found any
if content:
yield content
except Exception as e:
logger.error(f"Error parsing async streaming chunks: {e}")
return
def collect_all_chunks(self, streaming_response):
"""
Helper method to collect all chunks from a streaming response into a complete text.
This provides backward compatibility for code that expects a complete response.
Args:
streaming_response: The streaming response object from litellm
Returns:
str: The complete response text collected from all chunks
"""
chunks = []
for chunk in self._collect_streaming_response(streaming_response):
chunks.append(chunk)
complete_response = "".join(chunks)
logger.info(f"Collected complete streaming response: {len(complete_response)} characters")
return complete_response
async def collect_all_chunks_async(self, streaming_response):
"""
Helper method to collect all chunks from an async streaming response into a complete text.
This provides backward compatibility for code that expects a complete response.
Args:
streaming_response: The async streaming response object from litellm
Returns:
str: The complete response text collected from all chunks
"""
chunks = []
async for chunk in self._collect_streaming_response_async(streaming_response):
chunks.append(chunk)
complete_response = "".join(chunks)
logger.info(f"Collected complete async streaming response: {len(complete_response)} characters")
return complete_response
def output_for_tools(self, response: any): def output_for_tools(self, response: any):
if self.mcp_call is True: if self.mcp_call is True:
out = response.choices[0].message.tool_calls[0].function out = response.choices[0].message.tool_calls[0].function
@ -471,7 +599,9 @@ class LiteLLM:
**kwargs: Additional keyword arguments. **kwargs: Additional keyword arguments.
Returns: Returns:
str: The content of the response from the model. str or generator: When streaming is disabled, returns the complete response content.
When streaming is enabled, returns a generator that yields content chunks.
Use collect_all_chunks() to get complete response from the generator.
Raises: Raises:
Exception: If there is an error in processing the request. Exception: If there is an error in processing the request.
@ -525,7 +655,7 @@ class LiteLLM:
# Handle streaming response # Handle streaming response
if self.stream: if self.stream:
return response # Return the streaming generator directly return response
# Handle tool-based response # Handle tool-based response
elif self.tools_list_dictionary is not None: elif self.tools_list_dictionary is not None:
@ -574,7 +704,9 @@ class LiteLLM:
**kwargs: Additional keyword arguments. **kwargs: Additional keyword arguments.
Returns: Returns:
str: The content of the response from the model. str or async generator: When streaming is disabled, returns the complete response content.
When streaming is enabled, returns an async generator that yields content chunks.
Use collect_all_chunks_async() to get complete response from the generator.
""" """
try: try:
messages = self._prepare_messages(task) messages = self._prepare_messages(task)
@ -608,6 +740,10 @@ class LiteLLM:
# Standard completion # Standard completion
response = await acompletion(**completion_params) response = await acompletion(**completion_params)
# Handle streaming response for async
if self.stream:
return self._collect_streaming_response_async(response)
print(response) print(response)
return response return response

Loading…
Cancel
Save