|
|
@ -176,134 +176,6 @@ class LiteLLM:
|
|
|
|
|
|
|
|
|
|
|
|
litellm.drop_params = True
|
|
|
|
litellm.drop_params = True
|
|
|
|
|
|
|
|
|
|
|
|
# def _collect_streaming_response(self, streaming_response):
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
# Parse and yield individual content chunks from a streaming response.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Args:
|
|
|
|
|
|
|
|
# streaming_response: The streaming response object from litellm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Yields:
|
|
|
|
|
|
|
|
# str: Individual content chunks as they arrive
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
# try:
|
|
|
|
|
|
|
|
# for chunk in streaming_response:
|
|
|
|
|
|
|
|
# content = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Handle different chunk formats
|
|
|
|
|
|
|
|
# if hasattr(chunk, 'choices') and chunk.choices:
|
|
|
|
|
|
|
|
# choice = chunk.choices[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # OpenAI-style chunks
|
|
|
|
|
|
|
|
# if hasattr(choice, 'delta') and choice.delta:
|
|
|
|
|
|
|
|
# if hasattr(choice.delta, 'content') and choice.delta.content:
|
|
|
|
|
|
|
|
# content = choice.delta.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Alternative chunk format
|
|
|
|
|
|
|
|
# elif hasattr(choice, 'message') and choice.message:
|
|
|
|
|
|
|
|
# if hasattr(choice.message, 'content') and choice.message.content:
|
|
|
|
|
|
|
|
# content = choice.message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Anthropic-style chunks
|
|
|
|
|
|
|
|
# elif hasattr(chunk, 'type'):
|
|
|
|
|
|
|
|
# if chunk.type == 'content_block_delta' and hasattr(chunk, 'delta'):
|
|
|
|
|
|
|
|
# if chunk.delta.type == 'text_delta':
|
|
|
|
|
|
|
|
# content = chunk.delta.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Handle direct content chunks
|
|
|
|
|
|
|
|
# elif hasattr(chunk, 'content'):
|
|
|
|
|
|
|
|
# content = chunk.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Yield content chunk if we found any
|
|
|
|
|
|
|
|
# if content:
|
|
|
|
|
|
|
|
# yield content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# except Exception as e:
|
|
|
|
|
|
|
|
# logger.error(f"Error parsing streaming chunks: {e}")
|
|
|
|
|
|
|
|
# return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# async def _collect_streaming_response_async(self, streaming_response):
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
# Parse and yield individual content chunks from an async streaming response.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Args:
|
|
|
|
|
|
|
|
# streaming_response: The async streaming response object from litellm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Yields:
|
|
|
|
|
|
|
|
# str: Individual content chunks as they arrive
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
# try:
|
|
|
|
|
|
|
|
# async for chunk in streaming_response:
|
|
|
|
|
|
|
|
# content = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Handle different chunk formats
|
|
|
|
|
|
|
|
# if hasattr(chunk, 'choices') and chunk.choices:
|
|
|
|
|
|
|
|
# choice = chunk.choices[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # OpenAI-style chunks
|
|
|
|
|
|
|
|
# if hasattr(choice, 'delta') and choice.delta:
|
|
|
|
|
|
|
|
# if hasattr(choice.delta, 'content') and choice.delta.content:
|
|
|
|
|
|
|
|
# content = choice.delta.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Alternative chunk format
|
|
|
|
|
|
|
|
# elif hasattr(choice, 'message') and choice.message:
|
|
|
|
|
|
|
|
# if hasattr(choice.message, 'content') and choice.message.content:
|
|
|
|
|
|
|
|
# content = choice.message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Anthropic-style chunks
|
|
|
|
|
|
|
|
# elif hasattr(chunk, 'type'):
|
|
|
|
|
|
|
|
# if chunk.type == 'content_block_delta' and hasattr(chunk, 'delta'):
|
|
|
|
|
|
|
|
# if chunk.delta.type == 'text_delta':
|
|
|
|
|
|
|
|
# content = chunk.delta.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Handle direct content chunks
|
|
|
|
|
|
|
|
# elif hasattr(chunk, 'content'):
|
|
|
|
|
|
|
|
# content = chunk.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Yield content chunk if we found any
|
|
|
|
|
|
|
|
# if content:
|
|
|
|
|
|
|
|
# yield content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# except Exception as e:
|
|
|
|
|
|
|
|
# logger.error(f"Error parsing async streaming chunks: {e}")
|
|
|
|
|
|
|
|
# return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def collect_all_chunks(self, streaming_response):
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
# Helper method to collect all chunks from a streaming response into a complete text.
|
|
|
|
|
|
|
|
# This provides backward compatibility for code that expects a complete response.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Args:
|
|
|
|
|
|
|
|
# streaming_response: The streaming response object from litellm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Returns:
|
|
|
|
|
|
|
|
# str: The complete response text collected from all chunks
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
# chunks = []
|
|
|
|
|
|
|
|
# for chunk in self._collect_streaming_response(streaming_response):
|
|
|
|
|
|
|
|
# chunks.append(chunk)
|
|
|
|
|
|
|
|
# complete_response = "".join(chunks)
|
|
|
|
|
|
|
|
# logger.info(f"Collected complete streaming response: {len(complete_response)} characters")
|
|
|
|
|
|
|
|
# return complete_response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# async def collect_all_chunks_async(self, streaming_response):
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
# Helper method to collect all chunks from an async streaming response into a complete text.
|
|
|
|
|
|
|
|
# This provides backward compatibility for code that expects a complete response.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Args:
|
|
|
|
|
|
|
|
# streaming_response: The async streaming response object from litellm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Returns:
|
|
|
|
|
|
|
|
# str: The complete response text collected from all chunks
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
# chunks = []
|
|
|
|
|
|
|
|
# async for chunk in self._collect_streaming_response_async(streaming_response):
|
|
|
|
|
|
|
|
# chunks.append(chunk)
|
|
|
|
|
|
|
|
# complete_response = "".join(chunks)
|
|
|
|
|
|
|
|
# logger.info(f"Collected complete async streaming response: {len(complete_response)} characters")
|
|
|
|
|
|
|
|
# return complete_response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Store additional args and kwargs for use in run method
|
|
|
|
# Store additional args and kwargs for use in run method
|
|
|
|
self.init_args = args
|
|
|
|
self.init_args = args
|
|
|
|
self.init_kwargs = kwargs
|
|
|
|
self.init_kwargs = kwargs
|
|
|
|