parent
							
								
									4557e1c3fb
								
							
						
					
					
						commit
						950907581c
					
				@ -0,0 +1,39 @@
 | 
				
			|||||||
 | 
					import time
 | 
				
			||||||
 | 
					from functools import wraps
 | 
				
			||||||
 | 
					from typing import Callable
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def metrics_decorator(func: Callable):
 | 
				
			||||||
 | 
					    """Metrics decorator for LLM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Args:
 | 
				
			||||||
 | 
					        func (Callable): The function to decorate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Example:
 | 
				
			||||||
 | 
					    >>> @metrics_decorator
 | 
				
			||||||
 | 
					    >>> def my_function():
 | 
				
			||||||
 | 
					    >>>     return "Hello, world!"
 | 
				
			||||||
 | 
					    >>> my_function()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @wraps(func)
 | 
				
			||||||
 | 
					    def wrapper(self, *args, **kwargs):
 | 
				
			||||||
 | 
					        # Time to First Token
 | 
				
			||||||
 | 
					        start_time = time.time()
 | 
				
			||||||
 | 
					        result = func(self, *args, **kwargs)
 | 
				
			||||||
 | 
					        first_token_time = time.time()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Generation Latency
 | 
				
			||||||
 | 
					        end_time = time.time()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Throughput (assuming the function returns a list of tokens)
 | 
				
			||||||
 | 
					        throughput = len(result) / (end_time - start_time)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return f"""
 | 
				
			||||||
 | 
					        Time to First Token: {first_token_time - start_time}
 | 
				
			||||||
 | 
					        Generation Latency: {end_time - start_time}
 | 
				
			||||||
 | 
					        Throughput: {throughput}
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return wrapper
 | 
				
			||||||
@ -0,0 +1,63 @@
 | 
				
			|||||||
 | 
					import time
 | 
				
			||||||
 | 
					from swarms.utils.llm_metrcs_decorator import metrics_decorator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_metrics_decorator():
 | 
				
			||||||
 | 
					    @metrics_decorator
 | 
				
			||||||
 | 
					    def test_func():
 | 
				
			||||||
 | 
					        time.sleep(0.1)  # simulate some work
 | 
				
			||||||
 | 
					        return list(range(100))  # return a list of 100 tokens
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    result = test_func()
 | 
				
			||||||
 | 
					    lines = result.strip().split("\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Check that the decorator returns 3 lines of output
 | 
				
			||||||
 | 
					    assert len(lines) == 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Check that the Time to First Token is less than or equal to the Generation Latency
 | 
				
			||||||
 | 
					    time_to_first_token = float(lines[0].split(": ")[1])
 | 
				
			||||||
 | 
					    generation_latency = float(lines[1].split(": ")[1])
 | 
				
			||||||
 | 
					    assert time_to_first_token <= generation_latency
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Check that the Throughput is approximately equal to the number of tokens divided by the Generation Latency
 | 
				
			||||||
 | 
					    throughput = float(lines[2].split(": ")[1])
 | 
				
			||||||
 | 
					    assert (
 | 
				
			||||||
 | 
					        abs(throughput - 100 / generation_latency) < 0.01
 | 
				
			||||||
 | 
					    )  # allow for a small amount of error
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_metrics_decorator_1_token():
 | 
				
			||||||
 | 
					    @metrics_decorator
 | 
				
			||||||
 | 
					    def test_func():
 | 
				
			||||||
 | 
					        time.sleep(0.1)  # simulate some work
 | 
				
			||||||
 | 
					        return [0]  # return a list of 1 token
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    result = test_func()
 | 
				
			||||||
 | 
					    lines = result.strip().split("\n")
 | 
				
			||||||
 | 
					    assert len(lines) == 3
 | 
				
			||||||
 | 
					    time_to_first_token = float(lines[0].split(": ")[1])
 | 
				
			||||||
 | 
					    generation_latency = float(lines[1].split(": ")[1])
 | 
				
			||||||
 | 
					    assert time_to_first_token <= generation_latency
 | 
				
			||||||
 | 
					    throughput = float(lines[2].split(": ")[1])
 | 
				
			||||||
 | 
					    assert abs(throughput - 1 / generation_latency) < 0.01
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Repeat the test with different numbers of tokens and different amounts of work
 | 
				
			||||||
 | 
					for i in range(2, 17):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_func():
 | 
				
			||||||
 | 
					        @metrics_decorator
 | 
				
			||||||
 | 
					        def test_func():
 | 
				
			||||||
 | 
					            time.sleep(0.01 * i)  # simulate some work
 | 
				
			||||||
 | 
					            return list(range(i))  # return a list of i tokens
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        result = test_func()
 | 
				
			||||||
 | 
					        lines = result.strip().split("\n")
 | 
				
			||||||
 | 
					        assert len(lines) == 3
 | 
				
			||||||
 | 
					        time_to_first_token = float(lines[0].split(": ")[1])
 | 
				
			||||||
 | 
					        generation_latency = float(lines[1].split(": ")[1])
 | 
				
			||||||
 | 
					        assert time_to_first_token <= generation_latency
 | 
				
			||||||
 | 
					        throughput = float(lines[2].split(": ")[1])
 | 
				
			||||||
 | 
					        assert abs(throughput - i / generation_latency) < 0.01
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    globals()[f"test_metrics_decorator_{i}_tokens"] = test_func
 | 
				
			||||||
					Loading…
					
					
				
		Reference in new issue