parent
8cdb82bd9d
commit
49ce4db646
@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Find all __pycache__ directories and delete them
|
||||
find . -type d -name "__pycache__" -exec rm -rf {} +
|
@ -1,83 +0,0 @@
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from swarms.models.autotemp import AutoTempAgent
|
||||
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def auto_temp_agent():
|
||||
return AutoTempAgent(api_key=api_key)
|
||||
|
||||
|
||||
def test_initialization(auto_temp_agent):
|
||||
assert isinstance(auto_temp_agent, AutoTempAgent)
|
||||
assert auto_temp_agent.auto_select is True
|
||||
assert auto_temp_agent.max_workers == 6
|
||||
assert auto_temp_agent.temperature == 0.5
|
||||
assert auto_temp_agent.alt_temps == [0.4, 0.6, 0.8, 1.0, 1.2, 1.4]
|
||||
|
||||
|
||||
def test_evaluate_output(auto_temp_agent):
|
||||
output = "This is a test output."
|
||||
with patch("swarms.models.OpenAIChat") as MockOpenAIChat:
|
||||
mock_instance = MockOpenAIChat.return_value
|
||||
mock_instance.return_value = "Score: 95.5"
|
||||
score = auto_temp_agent.evaluate_output(output)
|
||||
assert score == 95.5
|
||||
mock_instance.assert_called_once()
|
||||
|
||||
|
||||
def test_run_auto_select(auto_temp_agent):
|
||||
task = "Generate a blog post."
|
||||
temperature_string = "0.4,0.6,0.8,1.0,1.2,1.4"
|
||||
result = auto_temp_agent.run(task, temperature_string)
|
||||
assert "Best AutoTemp Output" in result
|
||||
assert "Temp" in result
|
||||
assert "Score" in result
|
||||
|
||||
|
||||
def test_run_no_scores(auto_temp_agent):
|
||||
task = "Invalid task."
|
||||
temperature_string = "0.4,0.6,0.8,1.0,1.2,1.4"
|
||||
with ThreadPoolExecutor(
|
||||
max_workers=auto_temp_agent.max_workers
|
||||
) as executor:
|
||||
with patch.object(
|
||||
executor,
|
||||
"submit",
|
||||
side_effect=[None, None, None, None, None, None],
|
||||
):
|
||||
result = auto_temp_agent.run(task, temperature_string)
|
||||
assert result == "No valid outputs generated."
|
||||
|
||||
|
||||
def test_run_manual_select(auto_temp_agent):
|
||||
auto_temp_agent.auto_select = False
|
||||
task = "Generate a blog post."
|
||||
temperature_string = "0.4,0.6,0.8,1.0,1.2,1.4"
|
||||
result = auto_temp_agent.run(task, temperature_string)
|
||||
assert "Best AutoTemp Output" not in result
|
||||
assert "Temp" in result
|
||||
assert "Score" in result
|
||||
|
||||
|
||||
def test_failed_initialization():
|
||||
with pytest.raises(Exception):
|
||||
AutoTempAgent()
|
||||
|
||||
|
||||
def test_failed_evaluate_output(auto_temp_agent):
|
||||
output = "This is a test output."
|
||||
with patch("swarms.models.OpenAIChat") as MockOpenAIChat:
|
||||
mock_instance = MockOpenAIChat.return_value
|
||||
mock_instance.return_value = "Invalid score text"
|
||||
score = auto_temp_agent.evaluate_output(output)
|
||||
assert score == 0.0
|
@ -1,63 +0,0 @@
|
||||
import time
|
||||
from swarms.utils.llm_metrics_decorator import metrics_decorator
|
||||
|
||||
|
||||
def test_metrics_decorator():
|
||||
@metrics_decorator
|
||||
def test_func():
|
||||
time.sleep(0.1) # simulate some work
|
||||
return list(range(100)) # return a list of 100 tokens
|
||||
|
||||
result = test_func()
|
||||
lines = result.strip().split("\n")
|
||||
|
||||
# Check that the decorator returns 3 lines of output
|
||||
assert len(lines) == 3
|
||||
|
||||
# Check that the Time to First Token is less than or equal to the Generation Latency
|
||||
time_to_first_token = float(lines[0].split(": ")[1])
|
||||
generation_latency = float(lines[1].split(": ")[1])
|
||||
assert time_to_first_token <= generation_latency
|
||||
|
||||
# Check that the Throughput is approximately equal to the number of tokens divided by the Generation Latency
|
||||
throughput = float(lines[2].split(": ")[1])
|
||||
assert (
|
||||
abs(throughput - 100 / generation_latency) < 0.01
|
||||
) # allow for a small amount of error
|
||||
|
||||
|
||||
def test_metrics_decorator_1_token():
|
||||
@metrics_decorator
|
||||
def test_func():
|
||||
time.sleep(0.1) # simulate some work
|
||||
return [0] # return a list of 1 token
|
||||
|
||||
result = test_func()
|
||||
lines = result.strip().split("\n")
|
||||
assert len(lines) == 3
|
||||
time_to_first_token = float(lines[0].split(": ")[1])
|
||||
generation_latency = float(lines[1].split(": ")[1])
|
||||
assert time_to_first_token <= generation_latency
|
||||
throughput = float(lines[2].split(": ")[1])
|
||||
assert abs(throughput - 1 / generation_latency) < 0.01
|
||||
|
||||
|
||||
# Repeat the test with different numbers of tokens and different amounts of work
|
||||
for i in range(2, 17):
|
||||
|
||||
def test_func():
|
||||
@metrics_decorator
|
||||
def test_func():
|
||||
time.sleep(0.01 * i) # simulate some work
|
||||
return list(range(i)) # return a list of i tokens
|
||||
|
||||
result = test_func()
|
||||
lines = result.strip().split("\n")
|
||||
assert len(lines) == 3
|
||||
time_to_first_token = float(lines[0].split(": ")[1])
|
||||
generation_latency = float(lines[1].split(": ")[1])
|
||||
assert time_to_first_token <= generation_latency
|
||||
throughput = float(lines[2].split(": ")[1])
|
||||
assert abs(throughput - i / generation_latency) < 0.01
|
||||
|
||||
globals()[f"test_metrics_decorator_{i}_tokens"] = test_func
|
@ -1,150 +0,0 @@
|
||||
# Import necessary modules and functions for testing
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
# Try importing phoenix and handle exceptions
|
||||
try:
|
||||
import phoenix as px
|
||||
except Exception as error:
|
||||
print(f"Error importing phoenix: {error}")
|
||||
print("Please install phoenix: pip install phoenix")
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "arize-mlflow"]
|
||||
)
|
||||
|
||||
# Import the code to be tested
|
||||
from swarms.utils.phoenix_handler import phoenix_trace_decorator
|
||||
|
||||
|
||||
# Define a fixture for Phoenix session
|
||||
@pytest.fixture(scope="function")
|
||||
def phoenix_session():
|
||||
session = px.active_session() or px.launch_app()
|
||||
yield session
|
||||
session.stop()
|
||||
|
||||
|
||||
# Define test cases for the phoenix_trace_decorator function
|
||||
def test_phoenix_trace_decorator_documentation():
|
||||
"""Test if phoenix_trace_decorator has a docstring."""
|
||||
assert phoenix_trace_decorator.__doc__ is not None
|
||||
|
||||
|
||||
def test_phoenix_trace_decorator_functionality(
|
||||
capsys, phoenix_session
|
||||
):
|
||||
"""Test the functionality of phoenix_trace_decorator."""
|
||||
|
||||
# Define a function to be decorated
|
||||
@phoenix_trace_decorator("This is a test function.")
|
||||
def test_function():
|
||||
print("Hello, Phoenix!")
|
||||
|
||||
# Execute the decorated function
|
||||
test_function()
|
||||
|
||||
# Capture the printed output
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == "Hello, Phoenix!\n"
|
||||
|
||||
|
||||
def test_phoenix_trace_decorator_exception_handling(phoenix_session):
|
||||
"""Test if phoenix_trace_decorator handles exceptions correctly."""
|
||||
|
||||
# Define a function that raises an exception
|
||||
@phoenix_trace_decorator("This function raises an exception.")
|
||||
def exception_function():
|
||||
raise ValueError("An error occurred.")
|
||||
|
||||
# Execute the decorated function
|
||||
with pytest.raises(ValueError):
|
||||
exception_function()
|
||||
|
||||
# Check if the exception was traced by Phoenix
|
||||
traces = phoenix_session.get_traces()
|
||||
assert len(traces) == 1
|
||||
assert traces[0].get("error") is not None
|
||||
assert traces[0].get("error_info") is not None
|
||||
|
||||
|
||||
# Define test cases for phoenix_trace_decorator
|
||||
def test_phoenix_trace_decorator_docstring():
|
||||
"""Test if phoenix_trace_decorator's inner function has a docstring."""
|
||||
|
||||
@phoenix_trace_decorator("This is a test function.")
|
||||
def test_function():
|
||||
"""Test function docstring."""
|
||||
pass
|
||||
|
||||
assert test_function.__doc__ is not None
|
||||
|
||||
|
||||
def test_phoenix_trace_decorator_functionality_with_params(
|
||||
capsys, phoenix_session
|
||||
):
|
||||
"""Test the functionality of phoenix_trace_decorator with parameters."""
|
||||
|
||||
# Define a function with parameters to be decorated
|
||||
@phoenix_trace_decorator("This function takes parameters.")
|
||||
def param_function(a, b):
|
||||
result = a + b
|
||||
print(f"Result: {result}")
|
||||
|
||||
# Execute the decorated function with parameters
|
||||
param_function(2, 3)
|
||||
|
||||
# Capture the printed output
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == "Result: 5\n"
|
||||
|
||||
|
||||
def test_phoenix_trace_decorator_nested_calls(
|
||||
capsys, phoenix_session
|
||||
):
|
||||
"""Test nested calls of phoenix_trace_decorator."""
|
||||
|
||||
# Define a nested function with decorators
|
||||
@phoenix_trace_decorator("Outer function")
|
||||
def outer_function():
|
||||
print("Outer function")
|
||||
|
||||
@phoenix_trace_decorator("Inner function")
|
||||
def inner_function():
|
||||
print("Inner function")
|
||||
|
||||
inner_function()
|
||||
|
||||
# Execute the decorated functions
|
||||
outer_function()
|
||||
|
||||
# Capture the printed output
|
||||
captured = capsys.readouterr()
|
||||
assert "Outer function" in captured.out
|
||||
assert "Inner function" in captured.out
|
||||
|
||||
|
||||
def test_phoenix_trace_decorator_nested_exception_handling(
|
||||
phoenix_session,
|
||||
):
|
||||
"""Test exception handling with nested phoenix_trace_decorators."""
|
||||
|
||||
# Define a function with nested decorators and an exception
|
||||
@phoenix_trace_decorator("Outer function")
|
||||
def outer_function():
|
||||
@phoenix_trace_decorator("Inner function")
|
||||
def inner_function():
|
||||
raise ValueError("Inner error")
|
||||
|
||||
inner_function()
|
||||
|
||||
# Execute the decorated functions
|
||||
with pytest.raises(ValueError):
|
||||
outer_function()
|
||||
|
||||
# Check if both exceptions were traced by Phoenix
|
||||
traces = phoenix_session.get_traces()
|
||||
assert len(traces) == 2
|
||||
assert "Outer function" in traces[0].get("error_info")
|
||||
assert "Inner function" in traces[1].get("error_info")
|
Loading…
Reference in new issue