[FEAT][llm_metrics_decorator]

2 years ago · 950907581c
parent 4557e1c3fb
commit 950907581c
6 changed files with 169 additions and 10 deletions
--- a/swarms/models/base_llm.py
+++ b/swarms/models/base_llm.py
@ -1,9 +1,11 @@
-import os
+import asyncio
 import logging
 import os
 import time
 from abc import ABC, abstractmethod
-from typing import Optional, List
+from typing import List, Optional
-import asyncio
+
 from swarms.utils.llm_metrcs_decorator import metrics_decorator
 def count_tokens(text: str) -> int:
@ -118,6 +120,7 @@ class AbstractLLM(ABC):
        }
    @abstractmethod
    @metrics_decorator
    def run(self, task: Optional[str] = None, *args, **kwargs) -> str:
        """generate text using language model"""
        pass
@ -381,3 +384,48 @@ class AbstractLLM(ABC):
        TOKENS: {_num_tokens}
        Tokens/SEC: {_time_for_generation}
        """
    def time_to_first_token(self, prompt: str) -> float:
        """Time to first token
        Args:
            prompt (str): _description_
        Returns:
            float: _description_
        """
        start_time = time.time()
        tokens = self.track_resource_utilization(
            prompt
        )  # assuming `generate` is a method that generates tokens
        first_token_time = time.time()
        return first_token_time - start_time
    def generation_latency(self, prompt: str) -> float:
        """generation latency
        Args:
            prompt (str): _description_
        Returns:
            float: _description_
        """
        start_time = time.time()
        tokens = self.run(prompt)
        end_time = time.time()
        return end_time - start_time
    def throughput(self, prompts: List[str]) -> float:
        """throughput
        Args:
            prompts (): _description_
        Returns:
            float: _description_
        """
        start_time = time.time()
        for prompt in prompts:
            tokens = self.run(prompt)
        end_time = time.time()
        return len(prompts) / (end_time - start_time)
--- a/swarms/structs/init.py
+++ b/swarms/structs/init.py
@ -3,4 +3,9 @@ from swarms.structs.sequential_workflow import SequentialWorkflow
 from swarms.structs.autoscaler import AutoScaler
 from swarms.structs.conversation import Conversation
-__all__ = ["Agent", "SequentialWorkflow", "AutoScaler", "Conversation"]
+__all__ = [
    "Agent",
    "SequentialWorkflow",
    "AutoScaler",
    "Conversation",
 ]
--- a/swarms/structs/conversation.py
+++ b/swarms/structs/conversation.py
@ -9,22 +9,23 @@ from swarms.structs.base import BaseStructure
 class Conversation(BaseStructure):
    """
    Conversation class
-    
+
-    
+
    Attributes:
        time_enabled (bool): whether to enable time
        conversation_history (list): list of messages in the conversation
-        
+
-    
+
    Examples:
    >>> conv = Conversation()
    >>> conv.add("user", "Hello, world!")
    >>> conv.add("assistant", "Hello, user!")
    >>> conv.display_conversation()
    user: Hello, world!
-    
+
-    
+
    """
    def __init__(self, time_enabled: bool = False, *args, **kwargs):
        super().__init__()
        self.time_enabled = time_enabled
@ -186,6 +187,7 @@ class Conversation(BaseStructure):
        # Load the conversation history from a JSON file
        with open(filename, "r") as f:
            self.conversation_history = json.load(f)
    def search_keyword_in_conversation(self, keyword: str):
        """Search for a keyword in the conversation history
--- a/swarms/utils/init.py
+++ b/swarms/utils/init.py
@ -5,6 +5,7 @@ from swarms.utils.parse_code import (
 )
 from swarms.utils.pdf_to_text import pdf_to_text
 from swarms.utils.math_eval import math_eval
 from swarms.utils.llm_metrcs_decorator import metrics_decorator
 # from swarms.utils.phoenix_handler import phoenix_trace_decorator
@ -15,4 +16,5 @@ __all__ = [
    "pdf_to_text",
    # "phoenix_trace_decorator",
    "math_eval",
    "metrics_decorator",
 ]
--- a/swarms/utils/llm_metrcs_decorator.py
+++ b/swarms/utils/llm_metrcs_decorator.py
@ -0,0 +1,39 @@
 import time
 from functools import wraps
 from typing import Callable
 def metrics_decorator(func: Callable):
    """Metrics decorator for LLM
    Args:
        func (Callable): The function to decorate
    Example:
    >>> @metrics_decorator
    >>> def my_function():
    >>>     return "Hello, world!"
    >>> my_function()
    """
    @wraps(func)
    def wrapper(self, *args, **kwargs):
        # Time to First Token
        start_time = time.time()
        result = func(self, *args, **kwargs)
        first_token_time = time.time()
        # Generation Latency
        end_time = time.time()
        # Throughput (assuming the function returns a list of tokens)
        throughput = len(result) / (end_time - start_time)
        return f"""
        Time to First Token: {first_token_time - start_time}
        Generation Latency: {end_time - start_time}
        Throughput: {throughput}
        """
    return wrapper
--- a/tests/utils/metrics_decorator.py
+++ b/tests/utils/metrics_decorator.py
@ -0,0 +1,63 @@
 import time
 from swarms.utils.llm_metrcs_decorator import metrics_decorator
 def test_metrics_decorator():
    @metrics_decorator
    def test_func():
        time.sleep(0.1)  # simulate some work
        return list(range(100))  # return a list of 100 tokens
    result = test_func()
    lines = result.strip().split("\n")
    # Check that the decorator returns 3 lines of output
    assert len(lines) == 3
    # Check that the Time to First Token is less than or equal to the Generation Latency
    time_to_first_token = float(lines[0].split(": ")[1])
    generation_latency = float(lines[1].split(": ")[1])
    assert time_to_first_token <= generation_latency
    # Check that the Throughput is approximately equal to the number of tokens divided by the Generation Latency
    throughput = float(lines[2].split(": ")[1])
    assert (
        abs(throughput - 100 / generation_latency) < 0.01
    )  # allow for a small amount of error
 def test_metrics_decorator_1_token():
    @metrics_decorator
    def test_func():
        time.sleep(0.1)  # simulate some work
        return [0]  # return a list of 1 token
    result = test_func()
    lines = result.strip().split("\n")
    assert len(lines) == 3
    time_to_first_token = float(lines[0].split(": ")[1])
    generation_latency = float(lines[1].split(": ")[1])
    assert time_to_first_token <= generation_latency
    throughput = float(lines[2].split(": ")[1])
    assert abs(throughput - 1 / generation_latency) < 0.01
 # Repeat the test with different numbers of tokens and different amounts of work
 for i in range(2, 17):
    def test_func():
        @metrics_decorator
        def test_func():
            time.sleep(0.01 * i)  # simulate some work
            return list(range(i))  # return a list of i tokens
        result = test_func()
        lines = result.strip().split("\n")
        assert len(lines) == 3
        time_to_first_token = float(lines[0].split(": ")[1])
        generation_latency = float(lines[1].split(": ")[1])
        assert time_to_first_token <= generation_latency
        throughput = float(lines[2].split(": ")[1])
        assert abs(throughput - i / generation_latency) < 0.01
    globals()[f"test_metrics_decorator_{i}_tokens"] = test_func