parent
8cdb82bd9d
commit
49ce4db646
@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Find all __pycache__ directories and delete them
|
||||||
|
find . -type d -name "__pycache__" -exec rm -rf {} +
|
@ -1,83 +0,0 @@
|
|||||||
import os
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
from swarms.models.autotemp import AutoTempAgent
|
|
||||||
|
|
||||||
api_key = os.getenv("OPENAI_API_KEY")
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def auto_temp_agent():
|
|
||||||
return AutoTempAgent(api_key=api_key)
|
|
||||||
|
|
||||||
|
|
||||||
def test_initialization(auto_temp_agent):
|
|
||||||
assert isinstance(auto_temp_agent, AutoTempAgent)
|
|
||||||
assert auto_temp_agent.auto_select is True
|
|
||||||
assert auto_temp_agent.max_workers == 6
|
|
||||||
assert auto_temp_agent.temperature == 0.5
|
|
||||||
assert auto_temp_agent.alt_temps == [0.4, 0.6, 0.8, 1.0, 1.2, 1.4]
|
|
||||||
|
|
||||||
|
|
||||||
def test_evaluate_output(auto_temp_agent):
|
|
||||||
output = "This is a test output."
|
|
||||||
with patch("swarms.models.OpenAIChat") as MockOpenAIChat:
|
|
||||||
mock_instance = MockOpenAIChat.return_value
|
|
||||||
mock_instance.return_value = "Score: 95.5"
|
|
||||||
score = auto_temp_agent.evaluate_output(output)
|
|
||||||
assert score == 95.5
|
|
||||||
mock_instance.assert_called_once()
|
|
||||||
|
|
||||||
|
|
||||||
def test_run_auto_select(auto_temp_agent):
|
|
||||||
task = "Generate a blog post."
|
|
||||||
temperature_string = "0.4,0.6,0.8,1.0,1.2,1.4"
|
|
||||||
result = auto_temp_agent.run(task, temperature_string)
|
|
||||||
assert "Best AutoTemp Output" in result
|
|
||||||
assert "Temp" in result
|
|
||||||
assert "Score" in result
|
|
||||||
|
|
||||||
|
|
||||||
def test_run_no_scores(auto_temp_agent):
|
|
||||||
task = "Invalid task."
|
|
||||||
temperature_string = "0.4,0.6,0.8,1.0,1.2,1.4"
|
|
||||||
with ThreadPoolExecutor(
|
|
||||||
max_workers=auto_temp_agent.max_workers
|
|
||||||
) as executor:
|
|
||||||
with patch.object(
|
|
||||||
executor,
|
|
||||||
"submit",
|
|
||||||
side_effect=[None, None, None, None, None, None],
|
|
||||||
):
|
|
||||||
result = auto_temp_agent.run(task, temperature_string)
|
|
||||||
assert result == "No valid outputs generated."
|
|
||||||
|
|
||||||
|
|
||||||
def test_run_manual_select(auto_temp_agent):
|
|
||||||
auto_temp_agent.auto_select = False
|
|
||||||
task = "Generate a blog post."
|
|
||||||
temperature_string = "0.4,0.6,0.8,1.0,1.2,1.4"
|
|
||||||
result = auto_temp_agent.run(task, temperature_string)
|
|
||||||
assert "Best AutoTemp Output" not in result
|
|
||||||
assert "Temp" in result
|
|
||||||
assert "Score" in result
|
|
||||||
|
|
||||||
|
|
||||||
def test_failed_initialization():
|
|
||||||
with pytest.raises(Exception):
|
|
||||||
AutoTempAgent()
|
|
||||||
|
|
||||||
|
|
||||||
def test_failed_evaluate_output(auto_temp_agent):
|
|
||||||
output = "This is a test output."
|
|
||||||
with patch("swarms.models.OpenAIChat") as MockOpenAIChat:
|
|
||||||
mock_instance = MockOpenAIChat.return_value
|
|
||||||
mock_instance.return_value = "Invalid score text"
|
|
||||||
score = auto_temp_agent.evaluate_output(output)
|
|
||||||
assert score == 0.0
|
|
@ -1,63 +0,0 @@
|
|||||||
import time
|
|
||||||
from swarms.utils.llm_metrics_decorator import metrics_decorator
|
|
||||||
|
|
||||||
|
|
||||||
def test_metrics_decorator():
|
|
||||||
@metrics_decorator
|
|
||||||
def test_func():
|
|
||||||
time.sleep(0.1) # simulate some work
|
|
||||||
return list(range(100)) # return a list of 100 tokens
|
|
||||||
|
|
||||||
result = test_func()
|
|
||||||
lines = result.strip().split("\n")
|
|
||||||
|
|
||||||
# Check that the decorator returns 3 lines of output
|
|
||||||
assert len(lines) == 3
|
|
||||||
|
|
||||||
# Check that the Time to First Token is less than or equal to the Generation Latency
|
|
||||||
time_to_first_token = float(lines[0].split(": ")[1])
|
|
||||||
generation_latency = float(lines[1].split(": ")[1])
|
|
||||||
assert time_to_first_token <= generation_latency
|
|
||||||
|
|
||||||
# Check that the Throughput is approximately equal to the number of tokens divided by the Generation Latency
|
|
||||||
throughput = float(lines[2].split(": ")[1])
|
|
||||||
assert (
|
|
||||||
abs(throughput - 100 / generation_latency) < 0.01
|
|
||||||
) # allow for a small amount of error
|
|
||||||
|
|
||||||
|
|
||||||
def test_metrics_decorator_1_token():
|
|
||||||
@metrics_decorator
|
|
||||||
def test_func():
|
|
||||||
time.sleep(0.1) # simulate some work
|
|
||||||
return [0] # return a list of 1 token
|
|
||||||
|
|
||||||
result = test_func()
|
|
||||||
lines = result.strip().split("\n")
|
|
||||||
assert len(lines) == 3
|
|
||||||
time_to_first_token = float(lines[0].split(": ")[1])
|
|
||||||
generation_latency = float(lines[1].split(": ")[1])
|
|
||||||
assert time_to_first_token <= generation_latency
|
|
||||||
throughput = float(lines[2].split(": ")[1])
|
|
||||||
assert abs(throughput - 1 / generation_latency) < 0.01
|
|
||||||
|
|
||||||
|
|
||||||
# Repeat the test with different numbers of tokens and different amounts of work
|
|
||||||
for i in range(2, 17):
|
|
||||||
|
|
||||||
def test_func():
|
|
||||||
@metrics_decorator
|
|
||||||
def test_func():
|
|
||||||
time.sleep(0.01 * i) # simulate some work
|
|
||||||
return list(range(i)) # return a list of i tokens
|
|
||||||
|
|
||||||
result = test_func()
|
|
||||||
lines = result.strip().split("\n")
|
|
||||||
assert len(lines) == 3
|
|
||||||
time_to_first_token = float(lines[0].split(": ")[1])
|
|
||||||
generation_latency = float(lines[1].split(": ")[1])
|
|
||||||
assert time_to_first_token <= generation_latency
|
|
||||||
throughput = float(lines[2].split(": ")[1])
|
|
||||||
assert abs(throughput - i / generation_latency) < 0.01
|
|
||||||
|
|
||||||
globals()[f"test_metrics_decorator_{i}_tokens"] = test_func
|
|
@ -1,150 +0,0 @@
|
|||||||
# Import necessary modules and functions for testing
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
# Try importing phoenix and handle exceptions
|
|
||||||
try:
|
|
||||||
import phoenix as px
|
|
||||||
except Exception as error:
|
|
||||||
print(f"Error importing phoenix: {error}")
|
|
||||||
print("Please install phoenix: pip install phoenix")
|
|
||||||
subprocess.run(
|
|
||||||
[sys.executable, "-m", "pip", "install", "arize-mlflow"]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Import the code to be tested
|
|
||||||
from swarms.utils.phoenix_handler import phoenix_trace_decorator
|
|
||||||
|
|
||||||
|
|
||||||
# Define a fixture for Phoenix session
|
|
||||||
@pytest.fixture(scope="function")
|
|
||||||
def phoenix_session():
|
|
||||||
session = px.active_session() or px.launch_app()
|
|
||||||
yield session
|
|
||||||
session.stop()
|
|
||||||
|
|
||||||
|
|
||||||
# Define test cases for the phoenix_trace_decorator function
|
|
||||||
def test_phoenix_trace_decorator_documentation():
|
|
||||||
"""Test if phoenix_trace_decorator has a docstring."""
|
|
||||||
assert phoenix_trace_decorator.__doc__ is not None
|
|
||||||
|
|
||||||
|
|
||||||
def test_phoenix_trace_decorator_functionality(
|
|
||||||
capsys, phoenix_session
|
|
||||||
):
|
|
||||||
"""Test the functionality of phoenix_trace_decorator."""
|
|
||||||
|
|
||||||
# Define a function to be decorated
|
|
||||||
@phoenix_trace_decorator("This is a test function.")
|
|
||||||
def test_function():
|
|
||||||
print("Hello, Phoenix!")
|
|
||||||
|
|
||||||
# Execute the decorated function
|
|
||||||
test_function()
|
|
||||||
|
|
||||||
# Capture the printed output
|
|
||||||
captured = capsys.readouterr()
|
|
||||||
assert captured.out == "Hello, Phoenix!\n"
|
|
||||||
|
|
||||||
|
|
||||||
def test_phoenix_trace_decorator_exception_handling(phoenix_session):
|
|
||||||
"""Test if phoenix_trace_decorator handles exceptions correctly."""
|
|
||||||
|
|
||||||
# Define a function that raises an exception
|
|
||||||
@phoenix_trace_decorator("This function raises an exception.")
|
|
||||||
def exception_function():
|
|
||||||
raise ValueError("An error occurred.")
|
|
||||||
|
|
||||||
# Execute the decorated function
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
exception_function()
|
|
||||||
|
|
||||||
# Check if the exception was traced by Phoenix
|
|
||||||
traces = phoenix_session.get_traces()
|
|
||||||
assert len(traces) == 1
|
|
||||||
assert traces[0].get("error") is not None
|
|
||||||
assert traces[0].get("error_info") is not None
|
|
||||||
|
|
||||||
|
|
||||||
# Define test cases for phoenix_trace_decorator
|
|
||||||
def test_phoenix_trace_decorator_docstring():
|
|
||||||
"""Test if phoenix_trace_decorator's inner function has a docstring."""
|
|
||||||
|
|
||||||
@phoenix_trace_decorator("This is a test function.")
|
|
||||||
def test_function():
|
|
||||||
"""Test function docstring."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
assert test_function.__doc__ is not None
|
|
||||||
|
|
||||||
|
|
||||||
def test_phoenix_trace_decorator_functionality_with_params(
|
|
||||||
capsys, phoenix_session
|
|
||||||
):
|
|
||||||
"""Test the functionality of phoenix_trace_decorator with parameters."""
|
|
||||||
|
|
||||||
# Define a function with parameters to be decorated
|
|
||||||
@phoenix_trace_decorator("This function takes parameters.")
|
|
||||||
def param_function(a, b):
|
|
||||||
result = a + b
|
|
||||||
print(f"Result: {result}")
|
|
||||||
|
|
||||||
# Execute the decorated function with parameters
|
|
||||||
param_function(2, 3)
|
|
||||||
|
|
||||||
# Capture the printed output
|
|
||||||
captured = capsys.readouterr()
|
|
||||||
assert captured.out == "Result: 5\n"
|
|
||||||
|
|
||||||
|
|
||||||
def test_phoenix_trace_decorator_nested_calls(
|
|
||||||
capsys, phoenix_session
|
|
||||||
):
|
|
||||||
"""Test nested calls of phoenix_trace_decorator."""
|
|
||||||
|
|
||||||
# Define a nested function with decorators
|
|
||||||
@phoenix_trace_decorator("Outer function")
|
|
||||||
def outer_function():
|
|
||||||
print("Outer function")
|
|
||||||
|
|
||||||
@phoenix_trace_decorator("Inner function")
|
|
||||||
def inner_function():
|
|
||||||
print("Inner function")
|
|
||||||
|
|
||||||
inner_function()
|
|
||||||
|
|
||||||
# Execute the decorated functions
|
|
||||||
outer_function()
|
|
||||||
|
|
||||||
# Capture the printed output
|
|
||||||
captured = capsys.readouterr()
|
|
||||||
assert "Outer function" in captured.out
|
|
||||||
assert "Inner function" in captured.out
|
|
||||||
|
|
||||||
|
|
||||||
def test_phoenix_trace_decorator_nested_exception_handling(
|
|
||||||
phoenix_session,
|
|
||||||
):
|
|
||||||
"""Test exception handling with nested phoenix_trace_decorators."""
|
|
||||||
|
|
||||||
# Define a function with nested decorators and an exception
|
|
||||||
@phoenix_trace_decorator("Outer function")
|
|
||||||
def outer_function():
|
|
||||||
@phoenix_trace_decorator("Inner function")
|
|
||||||
def inner_function():
|
|
||||||
raise ValueError("Inner error")
|
|
||||||
|
|
||||||
inner_function()
|
|
||||||
|
|
||||||
# Execute the decorated functions
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
outer_function()
|
|
||||||
|
|
||||||
# Check if both exceptions were traced by Phoenix
|
|
||||||
traces = phoenix_session.get_traces()
|
|
||||||
assert len(traces) == 2
|
|
||||||
assert "Outer function" in traces[0].get("error_info")
|
|
||||||
assert "Inner function" in traces[1].get("error_info")
|
|
Loading…
Reference in new issue