stream added

pull/926/head
MalikHaroonKhokhar 1 month ago
parent b5694e26ae
commit 638e5514cd

@ -1052,34 +1052,90 @@ class Agent:
) )
self.memory_query(task_prompt) self.memory_query(task_prompt)
# This variable will hold the full, structured response object from the LLM
response_obj = None
# Check if streaming is enabled for this run
if self.streaming_on:
# This utility is needed to rebuild the full response after streaming is complete
from litellm import stream_chunk_builder
# Call our wrapper, which will now return a stream/generator
if img is not None: if img is not None:
response = self.call_llm( response_stream = self.call_llm(
task=task_prompt, img=img, *args, **kwargs
)
else:
response_stream = self.call_llm(
task=task_prompt, *args, **kwargs
)
# Prepare to collect the streaming chunks
collected_chunks = []
if not self.no_print:
# --- FIX: Manually print a header for clarity ---
print(f"--- Streaming Response from {self.agent_name} [Loop: {loop_count}] ---")
print(f"{self.agent_name}: ", end="")
try:
# Iterate through the stream in real-time
for chunk in response_stream:
collected_chunks.append(chunk)
content = chunk.choices[0].delta.content or ""
# Print each piece of text as it arrives
if content and not self.no_print:
print(content, end="", flush=True)
finally:
# --- FIX: Manually print a footer for clarity ---
if not self.no_print:
print() # Ensures we move to a new line after the stream
print("--- End of Stream ---")
# Reconstruct the full response object from all the chunks
response_obj = stream_chunk_builder(collected_chunks)
else:
# If not streaming, just get the complete response object at once
if img is not None:
response_obj = self.call_llm(
task=task_prompt, task=task_prompt,
img=img, img=img,
*args, *args,
**kwargs, **kwargs,
) )
else: else:
response = self.call_llm( response_obj = self.call_llm(
task=task_prompt, *args, **kwargs task=task_prompt, *args, **kwargs
) )
# Parse the response from the agent with the output type # From this point on, the logic is the same for both streaming and non-streaming
if exists(self.tools_list_dictionary):
if isinstance(response, BaseModel):
response = response.model_dump()
# Parse the response from the agent with the output type # 1. Get the text-only part of the response for logging and memory
response = self.parse_llm_output(response) response_text = self.parse_llm_output(response_obj)
# 2. Add the text to the agent's short-term memory
self.short_memory.add( self.short_memory.add(
role=self.agent_name, role=self.agent_name,
content=response, content=response_text,
) )
# Print # 3. If we didn't stream, print the whole response now
self.pretty_print(response, loop_count) if not self.streaming_on:
self.pretty_print(response_text, loop_count)
# 4. Pass the COMPLETE response object (with tool data) to the tool executor
if exists(self.tools):
if (
self.output_raw_json_from_tool_call
is True
):
tool_response_data = response_text
else:
tool_response_data = response_obj
self.execute_tools(
response=tool_response_data,
loop_count=loop_count,
)
# Check and execute callable tools # Check and execute callable tools
if exists(self.tools): if exists(self.tools):
@ -1087,10 +1143,13 @@ class Agent:
self.output_raw_json_from_tool_call self.output_raw_json_from_tool_call
is True is True
): ):
response = response tool_response_data = response_text
else: else:
# The standard, robust way using the full object
tool_response_data = response_obj
self.execute_tools( self.execute_tools(
response=response, response=tool_response_data,
loop_count=loop_count, loop_count=loop_count,
) )

@ -449,7 +449,12 @@ class LiteLLM:
# Make the completion call # Make the completion call
response = completion(**completion_params) response = completion(**completion_params)
# Handle tool-based response # If streaming is enabled, return the response generator directly.
# The Agent class will handle iterating through it.
if self.stream:
return response
# If not streaming, process the complete response object as before.
if self.tools_list_dictionary is not None: if self.tools_list_dictionary is not None:
return self.output_for_tools(response) return self.output_for_tools(response)
elif self.return_all is True: elif self.return_all is True:

Loading…
Cancel
Save