Refactor Agent class to simplify tool call handling and remove unnecessary streaming coordination flags

pull/938/head
harshalmore31 1 week ago
parent a7b783f717
commit a528139cc4

@ -573,13 +573,6 @@ class Agent:
) )
self.summarize_multiple_images = summarize_multiple_images self.summarize_multiple_images = summarize_multiple_images
self.tool_retry_attempts = tool_retry_attempts self.tool_retry_attempts = tool_retry_attempts
# Streaming / tool-call coordination flags
# When a tool call is expected we temporarily disable streaming so the
# LLM returns a complete JSON payload that can be parsed reliably. After
# the first tool call has been executed we re-enable streaming for
# subsequent requests / summaries.
self.expecting_tool_call: bool = False
self.tool_call_completed: bool = False
self.speed_mode = speed_mode self.speed_mode = speed_mode
# Initialize the feedback # Initialize the feedback
@ -1024,9 +1017,6 @@ class Agent:
agent(task="What is the capital of France?", img="path/to/image.jpg", is_last=True) agent(task="What is the capital of France?", img="path/to/image.jpg", is_last=True)
""" """
try: try:
# Preserve the original user task so that tool summaries can reference it
if task is not None:
self.run_task = str(task)
self.check_if_no_prompt_then_autogenerate(task) self.check_if_no_prompt_then_autogenerate(task)
@ -1076,13 +1066,6 @@ class Agent:
self.short_memory.return_history_as_string() self.short_memory.return_history_as_string()
) )
# Determine if this request is primarily to obtain the first tool call
if self.streaming_on and exists(self.tools) and not self.tool_call_completed:
# Disable streaming for this request so we can reliably parse JSON
self.expecting_tool_call = True
else:
self.expecting_tool_call = False
# Parameters # Parameters
attempt = 0 attempt = 0
success = False success = False
@ -1134,15 +1117,10 @@ class Agent:
# Check and execute callable tools # Check and execute callable tools
if exists(self.tools): if exists(self.tools):
# Use standard tool execution for both streaming and non-streaming
self.tool_execution_retry( self.tool_execution_retry(
response, loop_count response, loop_count
) )
# Mark that at least one tool call has been processed
self.tool_call_completed = True
# Reset expecting_tool_call so subsequent requests can stream
self.expecting_tool_call = False
# Handle MCP tools # Handle MCP tools
if ( if (
exists(self.mcp_url) exists(self.mcp_url)
@ -2552,10 +2530,8 @@ class Agent:
del kwargs["is_last"] del kwargs["is_last"]
try: try:
# Decide whether streaming should be used for this call # Set streaming parameter in LLM if streaming is enabled
streaming_enabled = self.streaming_on and not getattr(self, "expecting_tool_call", False) if self.streaming_on and hasattr(self.llm, "stream"):
# Set streaming parameter in LLM if streaming is enabled for this call
if streaming_enabled and hasattr(self.llm, "stream"):
original_stream = self.llm.stream original_stream = self.llm.stream
self.llm.stream = True self.llm.stream = True
@ -3004,18 +2980,12 @@ class Agent:
raise e raise e
def temp_llm_instance_for_tool_summary(self): def temp_llm_instance_for_tool_summary(self):
"""Create a temporary LiteLLM instance for the post-tool summary.
If the agent was configured with `streaming_on=True`, the summary
request will also stream; otherwise it will be a normal synchronous
call. No extra coordination flags are required.
"""
return LiteLLM( return LiteLLM(
model_name=self.model_name, model_name=self.model_name,
temperature=self.temperature, temperature=self.temperature,
max_tokens=self.max_tokens, max_tokens=self.max_tokens,
system_prompt=self.system_prompt, system_prompt=self.system_prompt,
stream=self.streaming_on, stream=False, # Always disable streaming for tool summaries
tools_list_dictionary=None, tools_list_dictionary=None,
parallel_tool_calls=False, parallel_tool_calls=False,
base_url=self.llm_base_url, base_url=self.llm_base_url,
@ -3062,42 +3032,28 @@ class Agent:
if self.tool_call_summary is True: if self.tool_call_summary is True:
temp_llm = self.temp_llm_instance_for_tool_summary() temp_llm = self.temp_llm_instance_for_tool_summary()
tool_prompt = f""" tool_response = temp_llm.run(
f"""
Please analyze and summarize the following tool execution output in a clear and concise way. Please analyze and summarize the following tool execution output in a clear and concise way.
Focus on the key information and insights that would be most relevant to the user's original request. Focus on the key information and insights that would be most relevant to the user's original request.
{self.run_task}
If there are any errors or issues, highlight them prominently. If there are any errors or issues, highlight them prominently.
Tool Output: Tool Output:
{output} {output}
""" """
# Stream the tool summary only if the agent is configured for streaming
if self.streaming_on and self.print_on:
# Handle streaming response with streaming panel
streaming_response = temp_llm.run(tool_prompt)
if hasattr(streaming_response, "__iter__") and not isinstance(streaming_response, str):
# Use streaming panel directly
tool_response = formatter.print_streaming_panel(
streaming_response,
title=f"🤖 Agent: {self.agent_name} Tool Summary",
style=None,
collect_chunks=True,
) )
else:
# Fallback for non-streaming response
tool_response = streaming_response
self.pretty_print(tool_response, loop_count)
else:
# Non-streaming response
tool_response = temp_llm.run(tool_prompt)
if self.print_on:
self.pretty_print(tool_response, loop_count)
self.short_memory.add( self.short_memory.add(
role=self.agent_name, role=self.agent_name,
content=tool_response, content=tool_response,
) )
if self.print_on is True:
self.pretty_print(
tool_response,
loop_count,
)
def list_output_types(self): def list_output_types(self):
return OutputType return OutputType

Loading…
Cancel
Save