pull/91/head
Kye 1 year ago
parent 8dfb1d33d0
commit fea0eeebc9

@ -294,3 +294,23 @@ class HuggingfaceLLM:
) )
print(dashboard) print(dashboard)
def set_device(self, device):
"""
Changes the device used for inference.
Parameters
----------
device : str
The new device to use for inference.
"""
self.device = device
self.model.to(self.device)
def set_max_length(self, max_length):
"""Set max_length"""
self.max_length = max_length
def clear_chat_history(self):
"""Clear chat history"""
self.chat_history = []

@ -35,7 +35,7 @@ class Petals:
"max_length": self.max_length, "max_length": self.max_length,
} }
def generate(self, prompt): def __call__(self, prompt):
"""Generate text using the Petals API.""" """Generate text using the Petals API."""
params = self._default_params() params = self._default_params()
inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"] inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"]

@ -0,0 +1,68 @@
# test_embeddings.py
import pytest
import openai
from unittest.mock import patch
from swarms.models.simple_ada import get_ada_embeddings # Adjust this import path to your project structure
from os import getenv
from dotenv import load_dotenv
load_dotenv()
# Fixture for test texts
@pytest.fixture
def test_texts():
return [
"Hello World",
"This is a test string with newline\ncharacters",
"A quick brown fox jumps over the lazy dog",
]
# Basic Test
def test_get_ada_embeddings_basic(test_texts):
with patch('openai.Embedding.create') as mock_create:
# Mocking the OpenAI API call
mock_create.return_value = {
"data": [
{"embedding": [0.1, 0.2, 0.3]}
]
}
for text in test_texts:
embedding = get_ada_embeddings(text)
assert embedding == [0.1, 0.2, 0.3], "Embedding does not match expected output"
mock_create.assert_called_with(input=[text.replace("\n", " ")], model="text-embedding-ada-002")
# Parameterized Test
@pytest.mark.parametrize(
"text, model, expected_call_model",
[
("Hello World", "text-embedding-ada-002", "text-embedding-ada-002"),
("Hello World", "text-embedding-ada-001", "text-embedding-ada-001"),
],
)
def test_get_ada_embeddings_models(text, model, expected_call_model):
with patch('openai.Embedding.create') as mock_create:
mock_create.return_value = {
"data": [
{"embedding": [0.1, 0.2, 0.3]}
]
}
_ = get_ada_embeddings(text, model=model)
mock_create.assert_called_with(input=[text], model=expected_call_model)
# Exception Test
def test_get_ada_embeddings_exception():
with patch('openai.Embedding.create') as mock_create:
mock_create.side_effect = openai.error.OpenAIError("Test error")
with pytest.raises(openai.error.OpenAIError):
get_ada_embeddings("Some text")
# Tests for environment variable loading
def test_env_var_loading(monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "testkey123")
with patch('openai.Embedding.create'):
assert getenv("OPENAI_API_KEY") == "testkey123", "Environment variable for API key is not set correctly"
# ... more tests to cover other aspects such as different input types, large inputs, invalid inputs, etc.

@ -1,58 +1,238 @@
import pytest
import torch import torch
from unittest.mock import Mock, patch import pytest
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from unittest.mock import patch, MagicMock
from swarms.models.huggingface import HuggingfaceLLM from swarms.models.huggingface import HuggingfaceLLM # Replace with the actual import path
# Fixture for the class instance
@pytest.fixture @pytest.fixture
def huggingface_llm(): def llm_instance():
# Create an instance of HuggingfaceLLM for testing.
model_id = "gpt2-small" model_id = "gpt2-small"
return HuggingfaceLLM(model_id=model_id) instance = HuggingfaceLLM(model_id=model_id)
return instance
def test_initialization(huggingface_llm):
# Test the initialization of the HuggingfaceLLM class. # Test for instantiation and attributes
assert huggingface_llm.model_id == "gpt2-small" def test_llm_initialization(llm_instance):
assert huggingface_llm.device in ["cpu", "cuda"] assert llm_instance.model_id == "gpt2-small"
assert huggingface_llm.max_length == 20 assert llm_instance.max_length == 500
assert huggingface_llm.verbose == False # ... add more assertions for all default attributes
assert huggingface_llm.distributed == False
assert huggingface_llm.decoding == False
assert huggingface_llm.model is None # Parameterized test for setting devices
assert huggingface_llm.tokenizer is None @pytest.mark.parametrize("device", ["cpu", "cuda"])
def test_llm_set_device(llm_instance, device):
llm_instance.set_device(device)
def test_load_model(huggingface_llm): assert llm_instance.device == device
# Test loading the model.
huggingface_llm.load_model()
assert isinstance(huggingface_llm.model, AutoModelForCausalLM) # Test exception during initialization with a bad model_id
assert isinstance(huggingface_llm.tokenizer, AutoTokenizer) def test_llm_bad_model_initialization():
with pytest.raises(Exception):
HuggingfaceLLM(model_id="unknown-model")
def test_run(huggingface_llm):
# Test the run method of HuggingfaceLLM.
prompt_text = "Once upon a time" # Mocking the tokenizer and model to test run method
generated_text = huggingface_llm.run(prompt_text) @patch("swarms.models.huggingface.AutoTokenizer.from_pretrained")
assert isinstance(generated_text, str) @patch("swarms.models.huggingface.AutoModelForCausalLM.from_pretrained")
assert len(generated_text) > 0 def test_llm_run(mock_model, mock_tokenizer, llm_instance):
mock_model.return_value.generate.return_value = "mocked output"
mock_tokenizer.return_value.encode.return_value = "mocked input"
def test_call_method(huggingface_llm): result = llm_instance.run("test task")
# Test the __call__ method of HuggingfaceLLM. assert result == "mocked output"
prompt_text = "Once upon a time"
generated_text = huggingface_llm(prompt_text)
assert isinstance(generated_text, str) # Async test (requires pytest-asyncio plugin)
assert len(generated_text) > 0 @pytest.mark.asyncio
async def test_llm_run_async(llm_instance):
result = await llm_instance.run_async("test task")
def test_load_model_failure(): assert isinstance(result, str)
# Test loading model failure.
with patch(
"your_module.AutoModelForCausalLM.from_pretrained", # Test for checking GPU availability
side_effect=Exception("Model load failed"), def test_llm_gpu_availability(llm_instance):
): # Assuming the test is running on a machine where the GPU availability is known
with pytest.raises(Exception): expected_result = torch.cuda.is_available()
huggingface_llm = HuggingfaceLLM(model_id="gpt2-small") assert llm_instance.gpu_available() == expected_result
huggingface_llm.load_model()
# Test for memory consumption reporting
def test_llm_memory_consumption(llm_instance):
# Mocking torch.cuda functions for consistent results
with patch("torch.cuda.memory_allocated", return_value=1024):
with patch("torch.cuda.memory_reserved", return_value=2048):
memory = llm_instance.memory_consumption()
assert memory == {"allocated": 1024, "reserved": 2048}
# Test different initialization parameters
@pytest.mark.parametrize("model_id, max_length", [
("gpt2-small", 100),
("gpt2-medium", 200),
("gpt2-large", None) # None to check default behavior
])
def test_llm_initialization_params(model_id, max_length):
if max_length:
instance = HuggingfaceLLM(model_id=model_id, max_length=max_length)
assert instance.max_length == max_length
else:
instance = HuggingfaceLLM(model_id=model_id)
assert instance.max_length == 500 # Assuming 500 is the default max_length
# Test for setting an invalid device
def test_llm_set_invalid_device(llm_instance):
with pytest.raises(ValueError):
llm_instance.set_device("quantum_processor")
# Test for model download progress bar
@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
def test_llm_model_download_progress(mock_download, llm_instance):
llm_instance.download_model_with_progress()
mock_download.assert_called_once()
# Mocking external API call to test run method without network
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_run_without_network(mock_run, llm_instance):
mock_run.return_value = "mocked output"
result = llm_instance.run("test task without network")
assert result == "mocked output"
# Test handling of empty input for the run method
def test_llm_run_empty_input(llm_instance):
with pytest.raises(ValueError):
llm_instance.run("")
# Test the generation with a provided seed for reproducibility
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_run_with_seed(mock_run, llm_instance):
seed = 42
llm_instance.set_seed(seed)
# Assuming set_seed method affects the randomness in the model
# You would typically ensure that setting the seed gives reproducible results
mock_run.return_value = "mocked deterministic output"
result = llm_instance.run("test task", seed=seed)
assert result == "mocked deterministic output"
# Test the output length is as expected
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_run_output_length(mock_run, llm_instance):
input_text = "test task"
llm_instance.max_length = 50 # set a max_length for the output
mock_run.return_value = "mocked output" * 10 # some long text
result = llm_instance.run(input_text)
assert len(result.split()) <= llm_instance.max_length
# Test the tokenizer handling special tokens correctly
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.encode")
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.decode")
def test_llm_tokenizer_special_tokens(mock_decode, mock_encode, llm_instance):
mock_encode.return_value = "encoded input with special tokens"
mock_decode.return_value = "decoded output with special tokens"
result = llm_instance.run("test task with special tokens")
mock_encode.assert_called_once()
mock_decode.assert_called_once()
assert "special tokens" in result
# Test for correct handling of timeouts
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_timeout_handling(mock_run, llm_instance):
mock_run.side_effect = TimeoutError
with pytest.raises(TimeoutError):
llm_instance.run("test task with timeout")
# Test for response time within a threshold (performance test)
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_response_time(mock_run, llm_instance):
import time
mock_run.return_value = "mocked output"
start_time = time.time()
llm_instance.run("test task for response time")
end_time = time.time()
assert end_time - start_time < 1 # Assuming the response should be faster than 1 second
# Test the logging of a warning for long inputs
@patch("swarms.models.huggingface.logging.warning")
def test_llm_long_input_warning(mock_warning, llm_instance):
long_input = "x" * 10000 # input longer than the typical limit
llm_instance.run(long_input)
mock_warning.assert_called_once()
# Test for run method behavior when model raises an exception
@patch("swarms.models.huggingface.HuggingfaceLLM._model.generate", side_effect=RuntimeError)
def test_llm_run_model_exception(mock_generate, llm_instance):
with pytest.raises(RuntimeError):
llm_instance.run("test task when model fails")
# Test the behavior when GPU is forced but not available
@patch("torch.cuda.is_available", return_value=False)
def test_llm_force_gpu_when_unavailable(mock_is_available, llm_instance):
with pytest.raises(EnvironmentError):
llm_instance.set_device("cuda") # Attempt to set CUDA when it's not available
# Test for proper cleanup after model use (releasing resources)
@patch("swarms.models.huggingface.HuggingfaceLLM._model")
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer")
def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance):
llm_instance.cleanup()
# Assuming cleanup method is meant to free resources
mock_model.delete.assert_called_once()
mock_tokenizer.delete.assert_called_once()
# Test updating the configuration after instantiation
def test_llm_update_configuration(llm_instance):
new_config = {"temperature": 0.7}
llm_instance.update_configuration(new_config)
assert llm_instance.configuration["temperature"] == 0.7
# Test if the model is re-downloaded when changing the model_id
@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
def test_llm_change_model_id(mock_download, llm_instance):
new_model_id = "gpt2-xl"
llm_instance.model_id = new_model_id
mock_download.assert_called_with(new_model_id)
# Test model's ability to handle multilingual input
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_multilingual_input(mock_run, llm_instance):
mock_run.return_value = "mocked multilingual output"
multilingual_input = "Bonjour, ceci est un test multilingue."
result = llm_instance.run(multilingual_input)
assert isinstance(result, str) # Simple check to ensure output is string type
# Test caching mechanism to prevent re-running the same inputs
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_caching_mechanism(mock_run, llm_instance):
input_text = "test caching mechanism"
mock_run.return_value = "cached output"
# Run the input twice
first_run_result = llm_instance.run(input_text)
second_run_result = llm_instance.run(input_text)
mock_run.assert_called_once() # Should only be called once due to caching
assert first_run_result == second_run_result
# Ensure that model re-downloads when force_download flag is set
@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
def test_llm_force_download(mock_download, llm_instance):
llm_instance.download_model_with_progress(force_download=True)
mock_download.assert_called_once_with(llm_instance.model_id, force=True)
# These tests are provided as examples. In real-world scenarios, you will need to adapt these tests to the actual logic of your `HuggingfaceLLM` class.
# For instance, "mock_model.delete.assert_called_once()" and similar lines are based on hypothetical methods and behaviors that you need to replace with actual implementations.

Loading…
Cancel
Save