You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/tests/utils/metrics_decorator.py

64 lines
2.2 KiB

import time
from swarms.utils.llm_metrcs_decorator import metrics_decorator
def test_metrics_decorator():
@metrics_decorator
def test_func():
time.sleep(0.1) # simulate some work
return list(range(100)) # return a list of 100 tokens
result = test_func()
lines = result.strip().split("\n")
# Check that the decorator returns 3 lines of output
assert len(lines) == 3
# Check that the Time to First Token is less than or equal to the Generation Latency
time_to_first_token = float(lines[0].split(": ")[1])
generation_latency = float(lines[1].split(": ")[1])
assert time_to_first_token <= generation_latency
# Check that the Throughput is approximately equal to the number of tokens divided by the Generation Latency
throughput = float(lines[2].split(": ")[1])
assert (
abs(throughput - 100 / generation_latency) < 0.01
) # allow for a small amount of error
def test_metrics_decorator_1_token():
@metrics_decorator
def test_func():
time.sleep(0.1) # simulate some work
return [0] # return a list of 1 token
result = test_func()
lines = result.strip().split("\n")
assert len(lines) == 3
time_to_first_token = float(lines[0].split(": ")[1])
generation_latency = float(lines[1].split(": ")[1])
assert time_to_first_token <= generation_latency
throughput = float(lines[2].split(": ")[1])
assert abs(throughput - 1 / generation_latency) < 0.01
# Repeat the test with different numbers of tokens and different amounts of work
for i in range(2, 17):
def test_func():
@metrics_decorator
def test_func():
time.sleep(0.01 * i) # simulate some work
return list(range(i)) # return a list of i tokens
result = test_func()
lines = result.strip().split("\n")
assert len(lines) == 3
time_to_first_token = float(lines[0].split(": ")[1])
generation_latency = float(lines[1].split(": ")[1])
assert time_to_first_token <= generation_latency
throughput = float(lines[2].split(": ")[1])
assert abs(throughput - i / generation_latency) < 0.01
globals()[f"test_metrics_decorator_{i}_tokens"] = test_func