parent
8dfb1d33d0
commit
fea0eeebc9
@ -0,0 +1,3 @@
|
||||
"""
|
||||
|
||||
"""
|
@ -0,0 +1,68 @@
|
||||
# test_embeddings.py
|
||||
|
||||
import pytest
|
||||
import openai
|
||||
from unittest.mock import patch
|
||||
from swarms.models.simple_ada import get_ada_embeddings # Adjust this import path to your project structure
|
||||
from os import getenv
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Fixture for test texts
|
||||
@pytest.fixture
|
||||
def test_texts():
|
||||
return [
|
||||
"Hello World",
|
||||
"This is a test string with newline\ncharacters",
|
||||
"A quick brown fox jumps over the lazy dog",
|
||||
]
|
||||
|
||||
# Basic Test
|
||||
def test_get_ada_embeddings_basic(test_texts):
|
||||
with patch('openai.Embedding.create') as mock_create:
|
||||
# Mocking the OpenAI API call
|
||||
mock_create.return_value = {
|
||||
"data": [
|
||||
{"embedding": [0.1, 0.2, 0.3]}
|
||||
]
|
||||
}
|
||||
|
||||
for text in test_texts:
|
||||
embedding = get_ada_embeddings(text)
|
||||
assert embedding == [0.1, 0.2, 0.3], "Embedding does not match expected output"
|
||||
mock_create.assert_called_with(input=[text.replace("\n", " ")], model="text-embedding-ada-002")
|
||||
|
||||
# Parameterized Test
|
||||
@pytest.mark.parametrize(
|
||||
"text, model, expected_call_model",
|
||||
[
|
||||
("Hello World", "text-embedding-ada-002", "text-embedding-ada-002"),
|
||||
("Hello World", "text-embedding-ada-001", "text-embedding-ada-001"),
|
||||
],
|
||||
)
|
||||
def test_get_ada_embeddings_models(text, model, expected_call_model):
|
||||
with patch('openai.Embedding.create') as mock_create:
|
||||
mock_create.return_value = {
|
||||
"data": [
|
||||
{"embedding": [0.1, 0.2, 0.3]}
|
||||
]
|
||||
}
|
||||
|
||||
_ = get_ada_embeddings(text, model=model)
|
||||
mock_create.assert_called_with(input=[text], model=expected_call_model)
|
||||
|
||||
# Exception Test
|
||||
def test_get_ada_embeddings_exception():
|
||||
with patch('openai.Embedding.create') as mock_create:
|
||||
mock_create.side_effect = openai.error.OpenAIError("Test error")
|
||||
with pytest.raises(openai.error.OpenAIError):
|
||||
get_ada_embeddings("Some text")
|
||||
|
||||
# Tests for environment variable loading
|
||||
def test_env_var_loading(monkeypatch):
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "testkey123")
|
||||
with patch('openai.Embedding.create'):
|
||||
assert getenv("OPENAI_API_KEY") == "testkey123", "Environment variable for API key is not set correctly"
|
||||
|
||||
# ... more tests to cover other aspects such as different input types, large inputs, invalid inputs, etc.
|
@ -1,58 +1,238 @@
|
||||
import pytest
|
||||
import torch
|
||||
from unittest.mock import Mock, patch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
||||
from swarms.models.huggingface import HuggingfaceLLM
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from swarms.models.huggingface import HuggingfaceLLM # Replace with the actual import path
|
||||
|
||||
|
||||
# Fixture for the class instance
|
||||
@pytest.fixture
|
||||
def huggingface_llm():
|
||||
# Create an instance of HuggingfaceLLM for testing.
|
||||
def llm_instance():
|
||||
model_id = "gpt2-small"
|
||||
return HuggingfaceLLM(model_id=model_id)
|
||||
|
||||
|
||||
def test_initialization(huggingface_llm):
|
||||
# Test the initialization of the HuggingfaceLLM class.
|
||||
assert huggingface_llm.model_id == "gpt2-small"
|
||||
assert huggingface_llm.device in ["cpu", "cuda"]
|
||||
assert huggingface_llm.max_length == 20
|
||||
assert huggingface_llm.verbose == False
|
||||
assert huggingface_llm.distributed == False
|
||||
assert huggingface_llm.decoding == False
|
||||
assert huggingface_llm.model is None
|
||||
assert huggingface_llm.tokenizer is None
|
||||
|
||||
|
||||
def test_load_model(huggingface_llm):
|
||||
# Test loading the model.
|
||||
huggingface_llm.load_model()
|
||||
assert isinstance(huggingface_llm.model, AutoModelForCausalLM)
|
||||
assert isinstance(huggingface_llm.tokenizer, AutoTokenizer)
|
||||
|
||||
|
||||
def test_run(huggingface_llm):
|
||||
# Test the run method of HuggingfaceLLM.
|
||||
prompt_text = "Once upon a time"
|
||||
generated_text = huggingface_llm.run(prompt_text)
|
||||
assert isinstance(generated_text, str)
|
||||
assert len(generated_text) > 0
|
||||
|
||||
|
||||
def test_call_method(huggingface_llm):
|
||||
# Test the __call__ method of HuggingfaceLLM.
|
||||
prompt_text = "Once upon a time"
|
||||
generated_text = huggingface_llm(prompt_text)
|
||||
assert isinstance(generated_text, str)
|
||||
assert len(generated_text) > 0
|
||||
|
||||
|
||||
def test_load_model_failure():
|
||||
# Test loading model failure.
|
||||
with patch(
|
||||
"your_module.AutoModelForCausalLM.from_pretrained",
|
||||
side_effect=Exception("Model load failed"),
|
||||
):
|
||||
instance = HuggingfaceLLM(model_id=model_id)
|
||||
return instance
|
||||
|
||||
|
||||
# Test for instantiation and attributes
|
||||
def test_llm_initialization(llm_instance):
|
||||
assert llm_instance.model_id == "gpt2-small"
|
||||
assert llm_instance.max_length == 500
|
||||
# ... add more assertions for all default attributes
|
||||
|
||||
|
||||
# Parameterized test for setting devices
|
||||
@pytest.mark.parametrize("device", ["cpu", "cuda"])
|
||||
def test_llm_set_device(llm_instance, device):
|
||||
llm_instance.set_device(device)
|
||||
assert llm_instance.device == device
|
||||
|
||||
|
||||
# Test exception during initialization with a bad model_id
|
||||
def test_llm_bad_model_initialization():
|
||||
with pytest.raises(Exception):
|
||||
huggingface_llm = HuggingfaceLLM(model_id="gpt2-small")
|
||||
huggingface_llm.load_model()
|
||||
HuggingfaceLLM(model_id="unknown-model")
|
||||
|
||||
|
||||
# Mocking the tokenizer and model to test run method
|
||||
@patch("swarms.models.huggingface.AutoTokenizer.from_pretrained")
|
||||
@patch("swarms.models.huggingface.AutoModelForCausalLM.from_pretrained")
|
||||
def test_llm_run(mock_model, mock_tokenizer, llm_instance):
|
||||
mock_model.return_value.generate.return_value = "mocked output"
|
||||
mock_tokenizer.return_value.encode.return_value = "mocked input"
|
||||
result = llm_instance.run("test task")
|
||||
assert result == "mocked output"
|
||||
|
||||
|
||||
# Async test (requires pytest-asyncio plugin)
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_run_async(llm_instance):
|
||||
result = await llm_instance.run_async("test task")
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
# Test for checking GPU availability
|
||||
def test_llm_gpu_availability(llm_instance):
|
||||
# Assuming the test is running on a machine where the GPU availability is known
|
||||
expected_result = torch.cuda.is_available()
|
||||
assert llm_instance.gpu_available() == expected_result
|
||||
|
||||
|
||||
# Test for memory consumption reporting
|
||||
def test_llm_memory_consumption(llm_instance):
|
||||
# Mocking torch.cuda functions for consistent results
|
||||
with patch("torch.cuda.memory_allocated", return_value=1024):
|
||||
with patch("torch.cuda.memory_reserved", return_value=2048):
|
||||
memory = llm_instance.memory_consumption()
|
||||
assert memory == {"allocated": 1024, "reserved": 2048}
|
||||
|
||||
|
||||
# Test different initialization parameters
|
||||
@pytest.mark.parametrize("model_id, max_length", [
|
||||
("gpt2-small", 100),
|
||||
("gpt2-medium", 200),
|
||||
("gpt2-large", None) # None to check default behavior
|
||||
])
|
||||
def test_llm_initialization_params(model_id, max_length):
|
||||
if max_length:
|
||||
instance = HuggingfaceLLM(model_id=model_id, max_length=max_length)
|
||||
assert instance.max_length == max_length
|
||||
else:
|
||||
instance = HuggingfaceLLM(model_id=model_id)
|
||||
assert instance.max_length == 500 # Assuming 500 is the default max_length
|
||||
|
||||
|
||||
# Test for setting an invalid device
|
||||
def test_llm_set_invalid_device(llm_instance):
|
||||
with pytest.raises(ValueError):
|
||||
llm_instance.set_device("quantum_processor")
|
||||
|
||||
|
||||
# Test for model download progress bar
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
|
||||
def test_llm_model_download_progress(mock_download, llm_instance):
|
||||
llm_instance.download_model_with_progress()
|
||||
mock_download.assert_called_once()
|
||||
|
||||
|
||||
# Mocking external API call to test run method without network
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
|
||||
def test_llm_run_without_network(mock_run, llm_instance):
|
||||
mock_run.return_value = "mocked output"
|
||||
result = llm_instance.run("test task without network")
|
||||
assert result == "mocked output"
|
||||
|
||||
|
||||
# Test handling of empty input for the run method
|
||||
def test_llm_run_empty_input(llm_instance):
|
||||
with pytest.raises(ValueError):
|
||||
llm_instance.run("")
|
||||
|
||||
|
||||
# Test the generation with a provided seed for reproducibility
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
|
||||
def test_llm_run_with_seed(mock_run, llm_instance):
|
||||
seed = 42
|
||||
llm_instance.set_seed(seed)
|
||||
# Assuming set_seed method affects the randomness in the model
|
||||
# You would typically ensure that setting the seed gives reproducible results
|
||||
mock_run.return_value = "mocked deterministic output"
|
||||
result = llm_instance.run("test task", seed=seed)
|
||||
assert result == "mocked deterministic output"
|
||||
|
||||
|
||||
# Test the output length is as expected
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
|
||||
def test_llm_run_output_length(mock_run, llm_instance):
|
||||
input_text = "test task"
|
||||
llm_instance.max_length = 50 # set a max_length for the output
|
||||
mock_run.return_value = "mocked output" * 10 # some long text
|
||||
result = llm_instance.run(input_text)
|
||||
assert len(result.split()) <= llm_instance.max_length
|
||||
|
||||
|
||||
# Test the tokenizer handling special tokens correctly
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.encode")
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.decode")
|
||||
def test_llm_tokenizer_special_tokens(mock_decode, mock_encode, llm_instance):
|
||||
mock_encode.return_value = "encoded input with special tokens"
|
||||
mock_decode.return_value = "decoded output with special tokens"
|
||||
result = llm_instance.run("test task with special tokens")
|
||||
mock_encode.assert_called_once()
|
||||
mock_decode.assert_called_once()
|
||||
assert "special tokens" in result
|
||||
|
||||
|
||||
# Test for correct handling of timeouts
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
|
||||
def test_llm_timeout_handling(mock_run, llm_instance):
|
||||
mock_run.side_effect = TimeoutError
|
||||
with pytest.raises(TimeoutError):
|
||||
llm_instance.run("test task with timeout")
|
||||
|
||||
|
||||
# Test for response time within a threshold (performance test)
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
|
||||
def test_llm_response_time(mock_run, llm_instance):
|
||||
import time
|
||||
mock_run.return_value = "mocked output"
|
||||
start_time = time.time()
|
||||
llm_instance.run("test task for response time")
|
||||
end_time = time.time()
|
||||
assert end_time - start_time < 1 # Assuming the response should be faster than 1 second
|
||||
|
||||
|
||||
# Test the logging of a warning for long inputs
|
||||
@patch("swarms.models.huggingface.logging.warning")
|
||||
def test_llm_long_input_warning(mock_warning, llm_instance):
|
||||
long_input = "x" * 10000 # input longer than the typical limit
|
||||
llm_instance.run(long_input)
|
||||
mock_warning.assert_called_once()
|
||||
|
||||
|
||||
# Test for run method behavior when model raises an exception
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM._model.generate", side_effect=RuntimeError)
|
||||
def test_llm_run_model_exception(mock_generate, llm_instance):
|
||||
with pytest.raises(RuntimeError):
|
||||
llm_instance.run("test task when model fails")
|
||||
|
||||
|
||||
# Test the behavior when GPU is forced but not available
|
||||
@patch("torch.cuda.is_available", return_value=False)
|
||||
def test_llm_force_gpu_when_unavailable(mock_is_available, llm_instance):
|
||||
with pytest.raises(EnvironmentError):
|
||||
llm_instance.set_device("cuda") # Attempt to set CUDA when it's not available
|
||||
|
||||
|
||||
# Test for proper cleanup after model use (releasing resources)
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM._model")
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer")
|
||||
def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance):
|
||||
llm_instance.cleanup()
|
||||
# Assuming cleanup method is meant to free resources
|
||||
mock_model.delete.assert_called_once()
|
||||
mock_tokenizer.delete.assert_called_once()
|
||||
|
||||
|
||||
# Test updating the configuration after instantiation
|
||||
def test_llm_update_configuration(llm_instance):
|
||||
new_config = {"temperature": 0.7}
|
||||
llm_instance.update_configuration(new_config)
|
||||
assert llm_instance.configuration["temperature"] == 0.7
|
||||
|
||||
|
||||
# Test if the model is re-downloaded when changing the model_id
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
|
||||
def test_llm_change_model_id(mock_download, llm_instance):
|
||||
new_model_id = "gpt2-xl"
|
||||
llm_instance.model_id = new_model_id
|
||||
mock_download.assert_called_with(new_model_id)
|
||||
|
||||
|
||||
# Test model's ability to handle multilingual input
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
|
||||
def test_llm_multilingual_input(mock_run, llm_instance):
|
||||
mock_run.return_value = "mocked multilingual output"
|
||||
multilingual_input = "Bonjour, ceci est un test multilingue."
|
||||
result = llm_instance.run(multilingual_input)
|
||||
assert isinstance(result, str) # Simple check to ensure output is string type
|
||||
|
||||
# Test caching mechanism to prevent re-running the same inputs
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
|
||||
def test_llm_caching_mechanism(mock_run, llm_instance):
|
||||
input_text = "test caching mechanism"
|
||||
mock_run.return_value = "cached output"
|
||||
# Run the input twice
|
||||
first_run_result = llm_instance.run(input_text)
|
||||
second_run_result = llm_instance.run(input_text)
|
||||
mock_run.assert_called_once() # Should only be called once due to caching
|
||||
assert first_run_result == second_run_result
|
||||
|
||||
|
||||
# Ensure that model re-downloads when force_download flag is set
|
||||
@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
|
||||
def test_llm_force_download(mock_download, llm_instance):
|
||||
llm_instance.download_model_with_progress(force_download=True)
|
||||
mock_download.assert_called_once_with(llm_instance.model_id, force=True)
|
||||
|
||||
|
||||
# These tests are provided as examples. In real-world scenarios, you will need to adapt these tests to the actual logic of your `HuggingfaceLLM` class.
|
||||
# For instance, "mock_model.delete.assert_called_once()" and similar lines are based on hypothetical methods and behaviors that you need to replace with actual implementations.
|
||||
|
Loading…
Reference in new issue