Refactor Agent class to simplify tool call handling and remove unnecessary streaming coordination flags

pull/938/head
harshalmore31 1 week ago
parent a7b783f717
commit a528139cc4

@ -573,13 +573,6 @@ class Agent:
)
self.summarize_multiple_images = summarize_multiple_images
self.tool_retry_attempts = tool_retry_attempts
# Streaming / tool-call coordination flags
# When a tool call is expected we temporarily disable streaming so the
# LLM returns a complete JSON payload that can be parsed reliably. After
# the first tool call has been executed we re-enable streaming for
# subsequent requests / summaries.
self.expecting_tool_call: bool = False
self.tool_call_completed: bool = False
self.speed_mode = speed_mode
# Initialize the feedback
@ -1024,9 +1017,6 @@ class Agent:
agent(task="What is the capital of France?", img="path/to/image.jpg", is_last=True)
"""
try:
# Preserve the original user task so that tool summaries can reference it
if task is not None:
self.run_task = str(task)
self.check_if_no_prompt_then_autogenerate(task)
@ -1076,13 +1066,6 @@ class Agent:
self.short_memory.return_history_as_string()
)
# Determine if this request is primarily to obtain the first tool call
if self.streaming_on and exists(self.tools) and not self.tool_call_completed:
# Disable streaming for this request so we can reliably parse JSON
self.expecting_tool_call = True
else:
self.expecting_tool_call = False
# Parameters
attempt = 0
success = False
@ -1134,15 +1117,10 @@ class Agent:
# Check and execute callable tools
if exists(self.tools):
# Use standard tool execution for both streaming and non-streaming
self.tool_execution_retry(
response, loop_count
)
# Mark that at least one tool call has been processed
self.tool_call_completed = True
# Reset expecting_tool_call so subsequent requests can stream
self.expecting_tool_call = False
# Handle MCP tools
if (
exists(self.mcp_url)
@ -2552,10 +2530,8 @@ class Agent:
del kwargs["is_last"]
try:
# Decide whether streaming should be used for this call
streaming_enabled = self.streaming_on and not getattr(self, "expecting_tool_call", False)
# Set streaming parameter in LLM if streaming is enabled for this call
if streaming_enabled and hasattr(self.llm, "stream"):
# Set streaming parameter in LLM if streaming is enabled
if self.streaming_on and hasattr(self.llm, "stream"):
original_stream = self.llm.stream
self.llm.stream = True
@ -3004,18 +2980,12 @@ class Agent:
raise e
def temp_llm_instance_for_tool_summary(self):
"""Create a temporary LiteLLM instance for the post-tool summary.
If the agent was configured with `streaming_on=True`, the summary
request will also stream; otherwise it will be a normal synchronous
call. No extra coordination flags are required.
"""
return LiteLLM(
model_name=self.model_name,
temperature=self.temperature,
max_tokens=self.max_tokens,
system_prompt=self.system_prompt,
stream=self.streaming_on,
stream=False, # Always disable streaming for tool summaries
tools_list_dictionary=None,
parallel_tool_calls=False,
base_url=self.llm_base_url,
@ -3062,42 +3032,28 @@ class Agent:
if self.tool_call_summary is True:
temp_llm = self.temp_llm_instance_for_tool_summary()
tool_prompt = f"""
tool_response = temp_llm.run(
f"""
Please analyze and summarize the following tool execution output in a clear and concise way.
Focus on the key information and insights that would be most relevant to the user's original request.
{self.run_task}
If there are any errors or issues, highlight them prominently.
Tool Output:
{output}
"""
# Stream the tool summary only if the agent is configured for streaming
if self.streaming_on and self.print_on:
# Handle streaming response with streaming panel
streaming_response = temp_llm.run(tool_prompt)
if hasattr(streaming_response, "__iter__") and not isinstance(streaming_response, str):
# Use streaming panel directly
tool_response = formatter.print_streaming_panel(
streaming_response,
title=f"🤖 Agent: {self.agent_name} Tool Summary",
style=None,
collect_chunks=True,
)
else:
# Fallback for non-streaming response
tool_response = streaming_response
self.pretty_print(tool_response, loop_count)
else:
# Non-streaming response
tool_response = temp_llm.run(tool_prompt)
if self.print_on:
self.pretty_print(tool_response, loop_count)
self.short_memory.add(
role=self.agent_name,
content=tool_response,
)
if self.print_on is True:
self.pretty_print(
tool_response,
loop_count,
)
def list_output_types(self):
return OutputType

Loading…
Cancel
Save