[FEAT][llm_metrics_decorator]

1 year ago · 950907581c
parent 4557e1c3fb
commit 950907581c
6 changed files with 169 additions and 10 deletions
--- a/swarms/models/base_llm.py
+++ b/swarms/models/base_llm.py
@ -1,9 +1,11 @@
-import os
+import asyncio
 import logging
+import os
 import time
 from abc import ABC, abstractmethod
-from typing import Optional, List
-import asyncio
+from typing import List, Optional
+
+from swarms.utils.llm_metrcs_decorator import metrics_decorator


 def count_tokens(text: str) -> int:
@ -118,6 +120,7 @@ class AbstractLLM(ABC):
        }

    @abstractmethod
+    @metrics_decorator
    def run(self, task: Optional[str] = None, *args, **kwargs) -> str:
        """generate text using language model"""
        pass
@ -381,3 +384,48 @@ class AbstractLLM(ABC):
        TOKENS: {_num_tokens}
        Tokens/SEC: {_time_for_generation}
        """
+
+    def time_to_first_token(self, prompt: str) -> float:
+        """Time to first token
+
+        Args:
+            prompt (str): _description_
+
+        Returns:
+            float: _description_
+        """
+        start_time = time.time()
+        tokens = self.track_resource_utilization(
+            prompt
+        )  # assuming `generate` is a method that generates tokens
+        first_token_time = time.time()
+        return first_token_time - start_time
+
+    def generation_latency(self, prompt: str) -> float:
+        """generation latency
+
+        Args:
+            prompt (str): _description_
+
+        Returns:
+            float: _description_
+        """
+        start_time = time.time()
+        tokens = self.run(prompt)
+        end_time = time.time()
+        return end_time - start_time
+
+    def throughput(self, prompts: List[str]) -> float:
+        """throughput
+
+        Args:
+            prompts (): _description_
+
+        Returns:
+            float: _description_
+        """
+        start_time = time.time()
+        for prompt in prompts:
+            tokens = self.run(prompt)
+        end_time = time.time()
+        return len(prompts) / (end_time - start_time)
--- a/swarms/structs/init.py
+++ b/swarms/structs/init.py
@ -3,4 +3,9 @@ from swarms.structs.sequential_workflow import SequentialWorkflow
 from swarms.structs.autoscaler import AutoScaler
 from swarms.structs.conversation import Conversation

-__all__ = ["Agent", "SequentialWorkflow", "AutoScaler", "Conversation"]
+__all__ = [
+    "Agent",
+    "SequentialWorkflow",
+    "AutoScaler",
+    "Conversation",
+]
--- a/swarms/structs/conversation.py
+++ b/swarms/structs/conversation.py
@ -9,22 +9,23 @@ from swarms.structs.base import BaseStructure
 class Conversation(BaseStructure):
    """
    Conversation class
-    
-    
+
+
    Attributes:
        time_enabled (bool): whether to enable time
        conversation_history (list): list of messages in the conversation
-        
-    
+
+
    Examples:
    >>> conv = Conversation()
    >>> conv.add("user", "Hello, world!")
    >>> conv.add("assistant", "Hello, user!")
    >>> conv.display_conversation()
    user: Hello, world!
-    
-    
+
+
    """
+
    def __init__(self, time_enabled: bool = False, *args, **kwargs):
        super().__init__()
        self.time_enabled = time_enabled
@ -186,6 +187,7 @@ class Conversation(BaseStructure):
        # Load the conversation history from a JSON file
        with open(filename, "r") as f:
            self.conversation_history = json.load(f)
+        

    def search_keyword_in_conversation(self, keyword: str):
        """Search for a keyword in the conversation history
--- a/swarms/utils/init.py
+++ b/swarms/utils/init.py
@ -5,6 +5,7 @@ from swarms.utils.parse_code import (
 )
 from swarms.utils.pdf_to_text import pdf_to_text
 from swarms.utils.math_eval import math_eval
+from swarms.utils.llm_metrcs_decorator import metrics_decorator

 # from swarms.utils.phoenix_handler import phoenix_trace_decorator

@ -15,4 +16,5 @@ __all__ = [
    "pdf_to_text",
    # "phoenix_trace_decorator",
    "math_eval",
+    "metrics_decorator",
 ]
--- a/swarms/utils/llm_metrcs_decorator.py
+++ b/swarms/utils/llm_metrcs_decorator.py
@ -0,0 +1,39 @@
+import time
+from functools import wraps
+from typing import Callable
+
+
+def metrics_decorator(func: Callable):
+    """Metrics decorator for LLM
+
+    Args:
+        func (Callable): The function to decorate
+
+    Example:
+    >>> @metrics_decorator
+    >>> def my_function():
+    >>>     return "Hello, world!"
+    >>> my_function()
+
+    """
+
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        # Time to First Token
+        start_time = time.time()
+        result = func(self, *args, **kwargs)
+        first_token_time = time.time()
+
+        # Generation Latency
+        end_time = time.time()
+
+        # Throughput (assuming the function returns a list of tokens)
+        throughput = len(result) / (end_time - start_time)
+
+        return f"""
+        Time to First Token: {first_token_time - start_time}
+        Generation Latency: {end_time - start_time}
+        Throughput: {throughput}
+        """
+
+    return wrapper
--- a/tests/utils/metrics_decorator.py
+++ b/tests/utils/metrics_decorator.py
@ -0,0 +1,63 @@
+import time
+from swarms.utils.llm_metrcs_decorator import metrics_decorator
+
+
+def test_metrics_decorator():
+    @metrics_decorator
+    def test_func():
+        time.sleep(0.1)  # simulate some work
+        return list(range(100))  # return a list of 100 tokens
+
+    result = test_func()
+    lines = result.strip().split("\n")
+
+    # Check that the decorator returns 3 lines of output
+    assert len(lines) == 3
+
+    # Check that the Time to First Token is less than or equal to the Generation Latency
+    time_to_first_token = float(lines[0].split(": ")[1])
+    generation_latency = float(lines[1].split(": ")[1])
+    assert time_to_first_token <= generation_latency
+
+    # Check that the Throughput is approximately equal to the number of tokens divided by the Generation Latency
+    throughput = float(lines[2].split(": ")[1])
+    assert (
+        abs(throughput - 100 / generation_latency) < 0.01
+    )  # allow for a small amount of error
+
+
+def test_metrics_decorator_1_token():
+    @metrics_decorator
+    def test_func():
+        time.sleep(0.1)  # simulate some work
+        return [0]  # return a list of 1 token
+
+    result = test_func()
+    lines = result.strip().split("\n")
+    assert len(lines) == 3
+    time_to_first_token = float(lines[0].split(": ")[1])
+    generation_latency = float(lines[1].split(": ")[1])
+    assert time_to_first_token <= generation_latency
+    throughput = float(lines[2].split(": ")[1])
+    assert abs(throughput - 1 / generation_latency) < 0.01
+
+
+# Repeat the test with different numbers of tokens and different amounts of work
+for i in range(2, 17):
+
+    def test_func():
+        @metrics_decorator
+        def test_func():
+            time.sleep(0.01 * i)  # simulate some work
+            return list(range(i))  # return a list of i tokens
+
+        result = test_func()
+        lines = result.strip().split("\n")
+        assert len(lines) == 3
+        time_to_first_token = float(lines[0].split(": ")[1])
+        generation_latency = float(lines[1].split(": ")[1])
+        assert time_to_first_token <= generation_latency
+        throughput = float(lines[2].split(": ")[1])
+        assert abs(throughput - i / generation_latency) < 0.01
+
+    globals()[f"test_metrics_decorator_{i}_tokens"] = test_func