From 950907581c1a87a22d445402a896cbe7aaf43e6b Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 22 Dec 2023 13:03:56 -0500 Subject: [PATCH] [FEAT][llm_metrics_decorator] --- swarms/models/base_llm.py | 54 ++++++++++++++++++++++-- swarms/structs/__init__.py | 7 +++- swarms/structs/conversation.py | 14 ++++--- swarms/utils/__init__.py | 2 + swarms/utils/llm_metrcs_decorator.py | 39 +++++++++++++++++ tests/utils/metrics_decorator.py | 63 ++++++++++++++++++++++++++++ 6 files changed, 169 insertions(+), 10 deletions(-) create mode 100644 swarms/utils/llm_metrcs_decorator.py create mode 100644 tests/utils/metrics_decorator.py diff --git a/swarms/models/base_llm.py b/swarms/models/base_llm.py index 0409b867..09316a24 100644 --- a/swarms/models/base_llm.py +++ b/swarms/models/base_llm.py @@ -1,9 +1,11 @@ -import os +import asyncio import logging +import os import time from abc import ABC, abstractmethod -from typing import Optional, List -import asyncio +from typing import List, Optional + +from swarms.utils.llm_metrcs_decorator import metrics_decorator def count_tokens(text: str) -> int: @@ -118,6 +120,7 @@ class AbstractLLM(ABC): } @abstractmethod + @metrics_decorator def run(self, task: Optional[str] = None, *args, **kwargs) -> str: """generate text using language model""" pass @@ -381,3 +384,48 @@ class AbstractLLM(ABC): TOKENS: {_num_tokens} Tokens/SEC: {_time_for_generation} """ + + def time_to_first_token(self, prompt: str) -> float: + """Time to first token + + Args: + prompt (str): _description_ + + Returns: + float: _description_ + """ + start_time = time.time() + tokens = self.track_resource_utilization( + prompt + ) # assuming `generate` is a method that generates tokens + first_token_time = time.time() + return first_token_time - start_time + + def generation_latency(self, prompt: str) -> float: + """generation latency + + Args: + prompt (str): _description_ + + Returns: + float: _description_ + """ + start_time = time.time() + tokens = self.run(prompt) + end_time = time.time() + return end_time - start_time + + def throughput(self, prompts: List[str]) -> float: + """throughput + + Args: + prompts (): _description_ + + Returns: + float: _description_ + """ + start_time = time.time() + for prompt in prompts: + tokens = self.run(prompt) + end_time = time.time() + return len(prompts) / (end_time - start_time) diff --git a/swarms/structs/__init__.py b/swarms/structs/__init__.py index b3cb4412..96d45220 100644 --- a/swarms/structs/__init__.py +++ b/swarms/structs/__init__.py @@ -3,4 +3,9 @@ from swarms.structs.sequential_workflow import SequentialWorkflow from swarms.structs.autoscaler import AutoScaler from swarms.structs.conversation import Conversation -__all__ = ["Agent", "SequentialWorkflow", "AutoScaler", "Conversation"] +__all__ = [ + "Agent", + "SequentialWorkflow", + "AutoScaler", + "Conversation", +] diff --git a/swarms/structs/conversation.py b/swarms/structs/conversation.py index e20aace9..a5840488 100644 --- a/swarms/structs/conversation.py +++ b/swarms/structs/conversation.py @@ -9,22 +9,23 @@ from swarms.structs.base import BaseStructure class Conversation(BaseStructure): """ Conversation class - - + + Attributes: time_enabled (bool): whether to enable time conversation_history (list): list of messages in the conversation - - + + Examples: >>> conv = Conversation() >>> conv.add("user", "Hello, world!") >>> conv.add("assistant", "Hello, user!") >>> conv.display_conversation() user: Hello, world! - - + + """ + def __init__(self, time_enabled: bool = False, *args, **kwargs): super().__init__() self.time_enabled = time_enabled @@ -186,6 +187,7 @@ class Conversation(BaseStructure): # Load the conversation history from a JSON file with open(filename, "r") as f: self.conversation_history = json.load(f) + def search_keyword_in_conversation(self, keyword: str): """Search for a keyword in the conversation history diff --git a/swarms/utils/__init__.py b/swarms/utils/__init__.py index 9ddbd324..ca149cc9 100644 --- a/swarms/utils/__init__.py +++ b/swarms/utils/__init__.py @@ -5,6 +5,7 @@ from swarms.utils.parse_code import ( ) from swarms.utils.pdf_to_text import pdf_to_text from swarms.utils.math_eval import math_eval +from swarms.utils.llm_metrcs_decorator import metrics_decorator # from swarms.utils.phoenix_handler import phoenix_trace_decorator @@ -15,4 +16,5 @@ __all__ = [ "pdf_to_text", # "phoenix_trace_decorator", "math_eval", + "metrics_decorator", ] diff --git a/swarms/utils/llm_metrcs_decorator.py b/swarms/utils/llm_metrcs_decorator.py new file mode 100644 index 00000000..a915623a --- /dev/null +++ b/swarms/utils/llm_metrcs_decorator.py @@ -0,0 +1,39 @@ +import time +from functools import wraps +from typing import Callable + + +def metrics_decorator(func: Callable): + """Metrics decorator for LLM + + Args: + func (Callable): The function to decorate + + Example: + >>> @metrics_decorator + >>> def my_function(): + >>> return "Hello, world!" + >>> my_function() + + """ + + @wraps(func) + def wrapper(self, *args, **kwargs): + # Time to First Token + start_time = time.time() + result = func(self, *args, **kwargs) + first_token_time = time.time() + + # Generation Latency + end_time = time.time() + + # Throughput (assuming the function returns a list of tokens) + throughput = len(result) / (end_time - start_time) + + return f""" + Time to First Token: {first_token_time - start_time} + Generation Latency: {end_time - start_time} + Throughput: {throughput} + """ + + return wrapper diff --git a/tests/utils/metrics_decorator.py b/tests/utils/metrics_decorator.py new file mode 100644 index 00000000..84a06eec --- /dev/null +++ b/tests/utils/metrics_decorator.py @@ -0,0 +1,63 @@ +import time +from swarms.utils.llm_metrcs_decorator import metrics_decorator + + +def test_metrics_decorator(): + @metrics_decorator + def test_func(): + time.sleep(0.1) # simulate some work + return list(range(100)) # return a list of 100 tokens + + result = test_func() + lines = result.strip().split("\n") + + # Check that the decorator returns 3 lines of output + assert len(lines) == 3 + + # Check that the Time to First Token is less than or equal to the Generation Latency + time_to_first_token = float(lines[0].split(": ")[1]) + generation_latency = float(lines[1].split(": ")[1]) + assert time_to_first_token <= generation_latency + + # Check that the Throughput is approximately equal to the number of tokens divided by the Generation Latency + throughput = float(lines[2].split(": ")[1]) + assert ( + abs(throughput - 100 / generation_latency) < 0.01 + ) # allow for a small amount of error + + +def test_metrics_decorator_1_token(): + @metrics_decorator + def test_func(): + time.sleep(0.1) # simulate some work + return [0] # return a list of 1 token + + result = test_func() + lines = result.strip().split("\n") + assert len(lines) == 3 + time_to_first_token = float(lines[0].split(": ")[1]) + generation_latency = float(lines[1].split(": ")[1]) + assert time_to_first_token <= generation_latency + throughput = float(lines[2].split(": ")[1]) + assert abs(throughput - 1 / generation_latency) < 0.01 + + +# Repeat the test with different numbers of tokens and different amounts of work +for i in range(2, 17): + + def test_func(): + @metrics_decorator + def test_func(): + time.sleep(0.01 * i) # simulate some work + return list(range(i)) # return a list of i tokens + + result = test_func() + lines = result.strip().split("\n") + assert len(lines) == 3 + time_to_first_token = float(lines[0].split(": ")[1]) + generation_latency = float(lines[1].split(": ")[1]) + assert time_to_first_token <= generation_latency + throughput = float(lines[2].split(": ")[1]) + assert abs(throughput - i / generation_latency) < 0.01 + + globals()[f"test_metrics_decorator_{i}_tokens"] = test_func