cleanup litellm_wrapper

4 months ago · f73cdc3934
parent 8cc1b67c85
commit f73cdc3934
1 changed files with 166 additions and 146 deletions
--- a/swarms/utils/litellm_wrapper.py
+++ b/swarms/utils/litellm_wrapper.py
@ -512,10 +512,21 @@ class LiteLLM:
                    f"Model {self.model_name} does not support vision"
                )
    def _collect_streaming_chunks(self, streaming_response, callback=None):
        """Helper method to collect chunks from streaming response."""
        chunks = []
        for chunk in streaming_response:
            if hasattr(chunk, "choices") and chunk.choices[0].delta.content:
                content = chunk.choices[0].delta.content
                chunks.append(content)
                if callback:
                    callback(content)
        return "".join(chunks)
    def _handle_streaming_response(
        self,
        streaming_response,
-        title: str = "🤖 LLM Response",
+        title: str = "LLM Response",
        style: Optional[str] = None,
        streaming_callback: Optional[Callable[[str], None]] = None,
        print_on: bool = True,
@ -535,50 +546,35 @@ class LiteLLM:
        Returns:
            str: The complete response string
        """
        # Non-streaming response - return as is
        if not (hasattr(streaming_response, "__iter__") and not isinstance(streaming_response, str)):
            return streaming_response
        # Handle callback streaming
        if streaming_callback is not None:
            return self._collect_streaming_chunks(streaming_response, streaming_callback)
        # Handle silent streaming
        if not print_on:
            return self._collect_streaming_chunks(streaming_response)
        # Handle formatted streaming with panel
        from swarms.utils.formatter import formatter
        import json
        from loguru import logger
        if hasattr(streaming_response, "__iter__") and not isinstance(streaming_response, str):
            if streaming_callback is not None:
                # Real-time callback streaming for dashboard integration
                chunks = []
                for chunk in streaming_response:
                    if hasattr(chunk, "choices") and chunk.choices[0].delta.content:
                        content = chunk.choices[0].delta.content
                        chunks.append(content)
                        streaming_callback(content)
                return "".join(chunks)
            elif not print_on:
                # Silent streaming - no printing, just collect chunks
                chunks = []
                for chunk in streaming_response:
                    if hasattr(chunk, "choices") and chunk.choices[0].delta.content:
                        content = chunk.choices[0].delta.content
                        chunks.append(content)
                return "".join(chunks)
            else:
                # Collect chunks for conversation saving
        collected_chunks = []
        def on_chunk_received(chunk: str):
                    """Callback to collect chunks as they arrive"""
            collected_chunks.append(chunk)
            if verbose:
                logger.debug(f"Streaming chunk received: {chunk[:50]}...")
-                # Use the streaming panel to display and collect the response
+        return formatter.print_streaming_panel(
                complete_response = formatter.print_streaming_panel(
            streaming_response,
            title=title,
            style=style,
            collect_chunks=True,
            on_chunk_callback=on_chunk_received,
        )
                return complete_response
        else:
            # Non-streaming response or string response
            return streaming_response
    def run_with_streaming(
        self,
@ -586,7 +582,7 @@ class LiteLLM:
        img: Optional[str] = None,
        audio: Optional[str] = None,
        streaming_callback: Optional[Callable[[str], None]] = None,
-        title: str = "🤖 LLM Response",
+        title: str = "LLM Response",
        style: Optional[str] = None,
        print_on: bool = True,
        verbose: bool = False,
@ -609,20 +605,19 @@ class LiteLLM:
        Returns:
            str: The complete response
        """
        # Enable streaming if not already set
        original_stream = self.stream
        self.stream = True
        try:
-            # Call the LLM
+            # Build kwargs for run method
            run_kwargs = {"task": task, **kwargs}
            if img is not None:
-                response = self.run(task=task, img=img, audio=audio, *args, **kwargs)
+                run_kwargs["img"] = img
-            elif audio is not None:
+            if audio is not None:
-                response = self.run(task=task, audio=audio, *args, **kwargs)
+                run_kwargs["audio"] = audio
-            else:
+            
-                response = self.run(task=task, *args, **kwargs)
+            response = self.run(*args, **run_kwargs)
            # Handle the streaming response
            return self._handle_streaming_response(
                response,
                title=title,
@ -632,7 +627,6 @@ class LiteLLM:
                verbose=verbose,
            )
        finally:
            # Restore original stream setting
            self.stream = original_stream
    def run_tool_summary_with_streaming(
@ -656,11 +650,9 @@ class LiteLLM:
        Returns:
            str: The complete summary response
        """
        summary_task = f"Please analyze and summarize the following tool execution output:\n\n{tool_results}"
        return self.run_with_streaming(
-            task=summary_task,
+            task=f"Please analyze and summarize the following tool execution output:\n\n{tool_results}",
-            title=f"🤖 Agent: {agent_name} - Tool Summary",
+            title=f"Agent: {agent_name} - Tool Summary",
            style="green",
            print_on=print_on,
            verbose=verbose,
@ -682,56 +674,26 @@ class LiteLLM:
            print_on: Whether to print the streaming output
            delay: Delay between characters for streaming effect
        """
-        if print_on and response:
+        if not (print_on and response):
-            # Simple character-by-character streaming for string responses
+            return
        import time
        for char in response:
            print(char, end="", flush=True)
            if delay > 0:
                    import time
                time.sleep(delay)
        print()  # Newline at the end
-    def parse_streaming_chunks_with_tools(
+    def _process_anthropic_chunk(self, chunk, current_tool_call, tool_call_buffer, tool_calls_in_stream, print_on, verbose):
-        self,
+        """Process Anthropic-style streaming chunks."""
        stream,
        agent_name: str = "Agent",
        print_on: bool = True,
        verbose: bool = False,
    ) -> tuple:
        """
        Parse streaming chunks and extract both text and tool calls.
        Args:
            stream: The streaming response object
            agent_name: Name of the agent for printing
            print_on: Whether to print streaming output
            verbose: Whether to enable verbose logging
        Returns:
            tuple: (full_text_response, tool_calls_list)
        """
        import json
        full_text_response = ""
        tool_calls_in_stream = []
        current_tool_call = None
        tool_call_buffer = ""
        if print_on:
            print(f"🤖 {agent_name}: ", end="", flush=True)
        # Process streaming chunks in real-time
        for chunk in stream:
            if verbose:
                logger.debug(f"Processing streaming chunk: {type(chunk)}")
        chunk_type = getattr(chunk, 'type', None)
        full_text_response = ""
            # Anthropic-style stream parsing
        if chunk_type == 'content_block_start' and hasattr(chunk, 'content_block') and chunk.content_block.type == 'tool_use':
            tool_name = chunk.content_block.name
            if print_on:
-                    print(f"\n🔧 Tool Call: {tool_name}...", flush=True)
+                print(f"\nTool Call: {tool_name}...", flush=True)
            current_tool_call = {"id": chunk.content_block.id, "name": tool_name, "input": ""}
            tool_call_buffer = ""
@ -754,10 +716,20 @@ class LiteLLM:
            current_tool_call = None
            tool_call_buffer = ""
-            # OpenAI-style stream parsing
+        return full_text_response, current_tool_call, tool_call_buffer
-            elif hasattr(chunk, 'choices') and chunk.choices:
+    
    def _process_openai_chunk(self, chunk, tool_calls_in_stream, print_on, verbose):
        """Process OpenAI-style streaming chunks."""
        import json
        full_text_response = ""
        if not (hasattr(chunk, 'choices') and chunk.choices):
            return full_text_response
        choice = chunk.choices[0]
-                if hasattr(choice, 'delta') and choice.delta:
+        if not (hasattr(choice, 'delta') and choice.delta):
            return full_text_response
        delta = choice.delta
        # Handle text content
@ -782,7 +754,7 @@ class LiteLLM:
                    # Create new tool call if slot is empty and we have a function name
                    if tool_calls_in_stream[tool_index] is None and hasattr(func, 'name') and func.name:
                        if print_on:
-                                        print(f"\n🔧 Tool Call: {func.name}...", flush=True)
+                            print(f"\nTool Call: {func.name}...", flush=True)
                        tool_calls_in_stream[tool_index] = {
                            "id": getattr(tool_call, 'id', f"call_{tool_index}"),
                            "name": func.name,
@ -806,6 +778,54 @@ class LiteLLM:
                        except json.JSONDecodeError:
                            pass
        return full_text_response
    def parse_streaming_chunks_with_tools(
        self,
        stream,
        agent_name: str = "Agent",
        print_on: bool = True,
        verbose: bool = False,
    ) -> tuple:
        """
        Parse streaming chunks and extract both text and tool calls.
        Args:
            stream: The streaming response object
            agent_name: Name of the agent for printing
            print_on: Whether to print streaming output
            verbose: Whether to enable verbose logging
        Returns:
            tuple: (full_text_response, tool_calls_list)
        """
        full_text_response = ""
        tool_calls_in_stream = []
        current_tool_call = None
        tool_call_buffer = ""
        if print_on:
            print(f"{agent_name}: ", end="", flush=True)
        # Process streaming chunks in real-time
        for chunk in stream:
            if verbose:
                logger.debug(f"Processing streaming chunk: {type(chunk)}")
            # Try Anthropic-style processing first
            anthropic_result = self._process_anthropic_chunk(
                chunk, current_tool_call, tool_call_buffer, tool_calls_in_stream, print_on, verbose
            )
            if anthropic_result[0]:  # If text was processed
                text_chunk, current_tool_call, tool_call_buffer = anthropic_result
                full_text_response += text_chunk
                continue
            # If not Anthropic, try OpenAI-style processing
            openai_text = self._process_openai_chunk(chunk, tool_calls_in_stream, print_on, verbose)
            if openai_text:
                full_text_response += openai_text
        if print_on:
            print()  # Newline after streaming text