From 950907581c1a87a22d445402a896cbe7aaf43e6b Mon Sep 17 00:00:00 2001
From: Kye <kye@apacmediasolutions.com>
Date: Fri, 22 Dec 2023 13:03:56 -0500
Subject: [PATCH] [FEAT][llm_metrics_decorator]

---
 swarms/models/base_llm.py            | 54 ++++++++++++++++++++++--
 swarms/structs/__init__.py           |  7 +++-
 swarms/structs/conversation.py       | 14 ++++---
 swarms/utils/__init__.py             |  2 +
 swarms/utils/llm_metrcs_decorator.py | 39 +++++++++++++++++
 tests/utils/metrics_decorator.py     | 63 ++++++++++++++++++++++++++++
 6 files changed, 169 insertions(+), 10 deletions(-)
 create mode 100644 swarms/utils/llm_metrcs_decorator.py
 create mode 100644 tests/utils/metrics_decorator.py

diff --git a/swarms/models/base_llm.py b/swarms/models/base_llm.py
index 0409b867..09316a24 100644
--- a/swarms/models/base_llm.py
+++ b/swarms/models/base_llm.py
@@ -1,9 +1,11 @@
-import os
+import asyncio
 import logging
+import os
 import time
 from abc import ABC, abstractmethod
-from typing import Optional, List
-import asyncio
+from typing import List, Optional
+
+from swarms.utils.llm_metrcs_decorator import metrics_decorator
 
 
 def count_tokens(text: str) -> int:
@@ -118,6 +120,7 @@ class AbstractLLM(ABC):
         }
 
     @abstractmethod
+    @metrics_decorator
     def run(self, task: Optional[str] = None, *args, **kwargs) -> str:
         """generate text using language model"""
         pass
@@ -381,3 +384,48 @@ class AbstractLLM(ABC):
         TOKENS: {_num_tokens}
         Tokens/SEC: {_time_for_generation}
         """
+
+    def time_to_first_token(self, prompt: str) -> float:
+        """Time to first token
+
+        Args:
+            prompt (str): _description_
+
+        Returns:
+            float: _description_
+        """
+        start_time = time.time()
+        tokens = self.track_resource_utilization(
+            prompt
+        )  # assuming `generate` is a method that generates tokens
+        first_token_time = time.time()
+        return first_token_time - start_time
+
+    def generation_latency(self, prompt: str) -> float:
+        """generation latency
+
+        Args:
+            prompt (str): _description_
+
+        Returns:
+            float: _description_
+        """
+        start_time = time.time()
+        tokens = self.run(prompt)
+        end_time = time.time()
+        return end_time - start_time
+
+    def throughput(self, prompts: List[str]) -> float:
+        """throughput
+
+        Args:
+            prompts (): _description_
+
+        Returns:
+            float: _description_
+        """
+        start_time = time.time()
+        for prompt in prompts:
+            tokens = self.run(prompt)
+        end_time = time.time()
+        return len(prompts) / (end_time - start_time)
diff --git a/swarms/structs/__init__.py b/swarms/structs/__init__.py
index b3cb4412..96d45220 100644
--- a/swarms/structs/__init__.py
+++ b/swarms/structs/__init__.py
@@ -3,4 +3,9 @@ from swarms.structs.sequential_workflow import SequentialWorkflow
 from swarms.structs.autoscaler import AutoScaler
 from swarms.structs.conversation import Conversation
 
-__all__ = ["Agent", "SequentialWorkflow", "AutoScaler", "Conversation"]
+__all__ = [
+    "Agent",
+    "SequentialWorkflow",
+    "AutoScaler",
+    "Conversation",
+]
diff --git a/swarms/structs/conversation.py b/swarms/structs/conversation.py
index e20aace9..a5840488 100644
--- a/swarms/structs/conversation.py
+++ b/swarms/structs/conversation.py
@@ -9,22 +9,23 @@ from swarms.structs.base import BaseStructure
 class Conversation(BaseStructure):
     """
     Conversation class
-    
-    
+
+
     Attributes:
         time_enabled (bool): whether to enable time
         conversation_history (list): list of messages in the conversation
-        
-    
+
+
     Examples:
     >>> conv = Conversation()
     >>> conv.add("user", "Hello, world!")
     >>> conv.add("assistant", "Hello, user!")
     >>> conv.display_conversation()
     user: Hello, world!
-    
-    
+
+
     """
+
     def __init__(self, time_enabled: bool = False, *args, **kwargs):
         super().__init__()
         self.time_enabled = time_enabled
@@ -186,6 +187,7 @@ class Conversation(BaseStructure):
         # Load the conversation history from a JSON file
         with open(filename, "r") as f:
             self.conversation_history = json.load(f)
+        
 
     def search_keyword_in_conversation(self, keyword: str):
         """Search for a keyword in the conversation history
diff --git a/swarms/utils/__init__.py b/swarms/utils/__init__.py
index 9ddbd324..ca149cc9 100644
--- a/swarms/utils/__init__.py
+++ b/swarms/utils/__init__.py
@@ -5,6 +5,7 @@ from swarms.utils.parse_code import (
 )
 from swarms.utils.pdf_to_text import pdf_to_text
 from swarms.utils.math_eval import math_eval
+from swarms.utils.llm_metrcs_decorator import metrics_decorator
 
 # from swarms.utils.phoenix_handler import phoenix_trace_decorator
 
@@ -15,4 +16,5 @@ __all__ = [
     "pdf_to_text",
     # "phoenix_trace_decorator",
     "math_eval",
+    "metrics_decorator",
 ]
diff --git a/swarms/utils/llm_metrcs_decorator.py b/swarms/utils/llm_metrcs_decorator.py
new file mode 100644
index 00000000..a915623a
--- /dev/null
+++ b/swarms/utils/llm_metrcs_decorator.py
@@ -0,0 +1,39 @@
+import time
+from functools import wraps
+from typing import Callable
+
+
+def metrics_decorator(func: Callable):
+    """Metrics decorator for LLM
+
+    Args:
+        func (Callable): The function to decorate
+
+    Example:
+    >>> @metrics_decorator
+    >>> def my_function():
+    >>>     return "Hello, world!"
+    >>> my_function()
+
+    """
+
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        # Time to First Token
+        start_time = time.time()
+        result = func(self, *args, **kwargs)
+        first_token_time = time.time()
+
+        # Generation Latency
+        end_time = time.time()
+
+        # Throughput (assuming the function returns a list of tokens)
+        throughput = len(result) / (end_time - start_time)
+
+        return f"""
+        Time to First Token: {first_token_time - start_time}
+        Generation Latency: {end_time - start_time}
+        Throughput: {throughput}
+        """
+
+    return wrapper
diff --git a/tests/utils/metrics_decorator.py b/tests/utils/metrics_decorator.py
new file mode 100644
index 00000000..84a06eec
--- /dev/null
+++ b/tests/utils/metrics_decorator.py
@@ -0,0 +1,63 @@
+import time
+from swarms.utils.llm_metrcs_decorator import metrics_decorator
+
+
+def test_metrics_decorator():
+    @metrics_decorator
+    def test_func():
+        time.sleep(0.1)  # simulate some work
+        return list(range(100))  # return a list of 100 tokens
+
+    result = test_func()
+    lines = result.strip().split("\n")
+
+    # Check that the decorator returns 3 lines of output
+    assert len(lines) == 3
+
+    # Check that the Time to First Token is less than or equal to the Generation Latency
+    time_to_first_token = float(lines[0].split(": ")[1])
+    generation_latency = float(lines[1].split(": ")[1])
+    assert time_to_first_token <= generation_latency
+
+    # Check that the Throughput is approximately equal to the number of tokens divided by the Generation Latency
+    throughput = float(lines[2].split(": ")[1])
+    assert (
+        abs(throughput - 100 / generation_latency) < 0.01
+    )  # allow for a small amount of error
+
+
+def test_metrics_decorator_1_token():
+    @metrics_decorator
+    def test_func():
+        time.sleep(0.1)  # simulate some work
+        return [0]  # return a list of 1 token
+
+    result = test_func()
+    lines = result.strip().split("\n")
+    assert len(lines) == 3
+    time_to_first_token = float(lines[0].split(": ")[1])
+    generation_latency = float(lines[1].split(": ")[1])
+    assert time_to_first_token <= generation_latency
+    throughput = float(lines[2].split(": ")[1])
+    assert abs(throughput - 1 / generation_latency) < 0.01
+
+
+# Repeat the test with different numbers of tokens and different amounts of work
+for i in range(2, 17):
+
+    def test_func():
+        @metrics_decorator
+        def test_func():
+            time.sleep(0.01 * i)  # simulate some work
+            return list(range(i))  # return a list of i tokens
+
+        result = test_func()
+        lines = result.strip().split("\n")
+        assert len(lines) == 3
+        time_to_first_token = float(lines[0].split(": ")[1])
+        generation_latency = float(lines[1].split(": ")[1])
+        assert time_to_first_token <= generation_latency
+        throughput = float(lines[2].split(": ")[1])
+        assert abs(throughput - i / generation_latency) < 0.01
+
+    globals()[f"test_metrics_decorator_{i}_tokens"] = test_func