cleanup litellm_wrapper

pull/938/head
harshalmore31 2 months ago
parent 8cc1b67c85
commit f73cdc3934

@ -512,10 +512,21 @@ class LiteLLM:
f"Model {self.model_name} does not support vision" f"Model {self.model_name} does not support vision"
) )
def _collect_streaming_chunks(self, streaming_response, callback=None):
"""Helper method to collect chunks from streaming response."""
chunks = []
for chunk in streaming_response:
if hasattr(chunk, "choices") and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
chunks.append(content)
if callback:
callback(content)
return "".join(chunks)
def _handle_streaming_response( def _handle_streaming_response(
self, self,
streaming_response, streaming_response,
title: str = "🤖 LLM Response", title: str = "LLM Response",
style: Optional[str] = None, style: Optional[str] = None,
streaming_callback: Optional[Callable[[str], None]] = None, streaming_callback: Optional[Callable[[str], None]] = None,
print_on: bool = True, print_on: bool = True,
@ -535,50 +546,35 @@ class LiteLLM:
Returns: Returns:
str: The complete response string str: The complete response string
""" """
# Non-streaming response - return as is
if not (hasattr(streaming_response, "__iter__") and not isinstance(streaming_response, str)):
return streaming_response
# Handle callback streaming
if streaming_callback is not None:
return self._collect_streaming_chunks(streaming_response, streaming_callback)
# Handle silent streaming
if not print_on:
return self._collect_streaming_chunks(streaming_response)
# Handle formatted streaming with panel
from swarms.utils.formatter import formatter from swarms.utils.formatter import formatter
import json
from loguru import logger from loguru import logger
if hasattr(streaming_response, "__iter__") and not isinstance(streaming_response, str):
if streaming_callback is not None:
# Real-time callback streaming for dashboard integration
chunks = []
for chunk in streaming_response:
if hasattr(chunk, "choices") and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
chunks.append(content)
streaming_callback(content)
return "".join(chunks)
elif not print_on:
# Silent streaming - no printing, just collect chunks
chunks = []
for chunk in streaming_response:
if hasattr(chunk, "choices") and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
chunks.append(content)
return "".join(chunks)
else:
# Collect chunks for conversation saving
collected_chunks = [] collected_chunks = []
def on_chunk_received(chunk: str): def on_chunk_received(chunk: str):
"""Callback to collect chunks as they arrive"""
collected_chunks.append(chunk) collected_chunks.append(chunk)
if verbose: if verbose:
logger.debug(f"Streaming chunk received: {chunk[:50]}...") logger.debug(f"Streaming chunk received: {chunk[:50]}...")
# Use the streaming panel to display and collect the response return formatter.print_streaming_panel(
complete_response = formatter.print_streaming_panel(
streaming_response, streaming_response,
title=title, title=title,
style=style, style=style,
collect_chunks=True, collect_chunks=True,
on_chunk_callback=on_chunk_received, on_chunk_callback=on_chunk_received,
) )
return complete_response
else:
# Non-streaming response or string response
return streaming_response
def run_with_streaming( def run_with_streaming(
self, self,
@ -586,7 +582,7 @@ class LiteLLM:
img: Optional[str] = None, img: Optional[str] = None,
audio: Optional[str] = None, audio: Optional[str] = None,
streaming_callback: Optional[Callable[[str], None]] = None, streaming_callback: Optional[Callable[[str], None]] = None,
title: str = "🤖 LLM Response", title: str = "LLM Response",
style: Optional[str] = None, style: Optional[str] = None,
print_on: bool = True, print_on: bool = True,
verbose: bool = False, verbose: bool = False,
@ -609,20 +605,19 @@ class LiteLLM:
Returns: Returns:
str: The complete response str: The complete response
""" """
# Enable streaming if not already set
original_stream = self.stream original_stream = self.stream
self.stream = True self.stream = True
try: try:
# Call the LLM # Build kwargs for run method
run_kwargs = {"task": task, **kwargs}
if img is not None: if img is not None:
response = self.run(task=task, img=img, audio=audio, *args, **kwargs) run_kwargs["img"] = img
elif audio is not None: if audio is not None:
response = self.run(task=task, audio=audio, *args, **kwargs) run_kwargs["audio"] = audio
else:
response = self.run(task=task, *args, **kwargs) response = self.run(*args, **run_kwargs)
# Handle the streaming response
return self._handle_streaming_response( return self._handle_streaming_response(
response, response,
title=title, title=title,
@ -632,7 +627,6 @@ class LiteLLM:
verbose=verbose, verbose=verbose,
) )
finally: finally:
# Restore original stream setting
self.stream = original_stream self.stream = original_stream
def run_tool_summary_with_streaming( def run_tool_summary_with_streaming(
@ -656,11 +650,9 @@ class LiteLLM:
Returns: Returns:
str: The complete summary response str: The complete summary response
""" """
summary_task = f"Please analyze and summarize the following tool execution output:\n\n{tool_results}"
return self.run_with_streaming( return self.run_with_streaming(
task=summary_task, task=f"Please analyze and summarize the following tool execution output:\n\n{tool_results}",
title=f"🤖 Agent: {agent_name} - Tool Summary", title=f"Agent: {agent_name} - Tool Summary",
style="green", style="green",
print_on=print_on, print_on=print_on,
verbose=verbose, verbose=verbose,
@ -682,56 +674,26 @@ class LiteLLM:
print_on: Whether to print the streaming output print_on: Whether to print the streaming output
delay: Delay between characters for streaming effect delay: Delay between characters for streaming effect
""" """
if print_on and response: if not (print_on and response):
# Simple character-by-character streaming for string responses return
import time
for char in response: for char in response:
print(char, end="", flush=True) print(char, end="", flush=True)
if delay > 0: if delay > 0:
import time
time.sleep(delay) time.sleep(delay)
print() # Newline at the end print() # Newline at the end
def parse_streaming_chunks_with_tools( def _process_anthropic_chunk(self, chunk, current_tool_call, tool_call_buffer, tool_calls_in_stream, print_on, verbose):
self, """Process Anthropic-style streaming chunks."""
stream,
agent_name: str = "Agent",
print_on: bool = True,
verbose: bool = False,
) -> tuple:
"""
Parse streaming chunks and extract both text and tool calls.
Args:
stream: The streaming response object
agent_name: Name of the agent for printing
print_on: Whether to print streaming output
verbose: Whether to enable verbose logging
Returns:
tuple: (full_text_response, tool_calls_list)
"""
import json import json
full_text_response = ""
tool_calls_in_stream = []
current_tool_call = None
tool_call_buffer = ""
if print_on:
print(f"🤖 {agent_name}: ", end="", flush=True)
# Process streaming chunks in real-time
for chunk in stream:
if verbose:
logger.debug(f"Processing streaming chunk: {type(chunk)}")
chunk_type = getattr(chunk, 'type', None) chunk_type = getattr(chunk, 'type', None)
full_text_response = ""
# Anthropic-style stream parsing
if chunk_type == 'content_block_start' and hasattr(chunk, 'content_block') and chunk.content_block.type == 'tool_use': if chunk_type == 'content_block_start' and hasattr(chunk, 'content_block') and chunk.content_block.type == 'tool_use':
tool_name = chunk.content_block.name tool_name = chunk.content_block.name
if print_on: if print_on:
print(f"\n🔧 Tool Call: {tool_name}...", flush=True) print(f"\nTool Call: {tool_name}...", flush=True)
current_tool_call = {"id": chunk.content_block.id, "name": tool_name, "input": ""} current_tool_call = {"id": chunk.content_block.id, "name": tool_name, "input": ""}
tool_call_buffer = "" tool_call_buffer = ""
@ -754,10 +716,20 @@ class LiteLLM:
current_tool_call = None current_tool_call = None
tool_call_buffer = "" tool_call_buffer = ""
# OpenAI-style stream parsing return full_text_response, current_tool_call, tool_call_buffer
elif hasattr(chunk, 'choices') and chunk.choices:
def _process_openai_chunk(self, chunk, tool_calls_in_stream, print_on, verbose):
"""Process OpenAI-style streaming chunks."""
import json
full_text_response = ""
if not (hasattr(chunk, 'choices') and chunk.choices):
return full_text_response
choice = chunk.choices[0] choice = chunk.choices[0]
if hasattr(choice, 'delta') and choice.delta: if not (hasattr(choice, 'delta') and choice.delta):
return full_text_response
delta = choice.delta delta = choice.delta
# Handle text content # Handle text content
@ -782,7 +754,7 @@ class LiteLLM:
# Create new tool call if slot is empty and we have a function name # Create new tool call if slot is empty and we have a function name
if tool_calls_in_stream[tool_index] is None and hasattr(func, 'name') and func.name: if tool_calls_in_stream[tool_index] is None and hasattr(func, 'name') and func.name:
if print_on: if print_on:
print(f"\n🔧 Tool Call: {func.name}...", flush=True) print(f"\nTool Call: {func.name}...", flush=True)
tool_calls_in_stream[tool_index] = { tool_calls_in_stream[tool_index] = {
"id": getattr(tool_call, 'id', f"call_{tool_index}"), "id": getattr(tool_call, 'id', f"call_{tool_index}"),
"name": func.name, "name": func.name,
@ -806,6 +778,54 @@ class LiteLLM:
except json.JSONDecodeError: except json.JSONDecodeError:
pass pass
return full_text_response
def parse_streaming_chunks_with_tools(
self,
stream,
agent_name: str = "Agent",
print_on: bool = True,
verbose: bool = False,
) -> tuple:
"""
Parse streaming chunks and extract both text and tool calls.
Args:
stream: The streaming response object
agent_name: Name of the agent for printing
print_on: Whether to print streaming output
verbose: Whether to enable verbose logging
Returns:
tuple: (full_text_response, tool_calls_list)
"""
full_text_response = ""
tool_calls_in_stream = []
current_tool_call = None
tool_call_buffer = ""
if print_on:
print(f"{agent_name}: ", end="", flush=True)
# Process streaming chunks in real-time
for chunk in stream:
if verbose:
logger.debug(f"Processing streaming chunk: {type(chunk)}")
# Try Anthropic-style processing first
anthropic_result = self._process_anthropic_chunk(
chunk, current_tool_call, tool_call_buffer, tool_calls_in_stream, print_on, verbose
)
if anthropic_result[0]: # If text was processed
text_chunk, current_tool_call, tool_call_buffer = anthropic_result
full_text_response += text_chunk
continue
# If not Anthropic, try OpenAI-style processing
openai_text = self._process_openai_chunk(chunk, tool_calls_in_stream, print_on, verbose)
if openai_text:
full_text_response += openai_text
if print_on: if print_on:
print() # Newline after streaming text print() # Newline after streaming text

Loading…
Cancel
Save