You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/tests/models/test_hf.py

465 lines
15 KiB

import logging
from unittest.mock import patch
import pytest
12 months ago
import torch
from swarms.models.huggingface import HuggingfaceLLM
10 months ago
from unittest.mock import patch
import pytest
import torch
from swarms.models.huggingface import (
HuggingfaceLLM, # Replace with the actual import path
)
# Fixture for the class instance
@pytest.fixture
def llm_instance():
model_id = "NousResearch/Nous-Hermes-2-Vision-Alpha"
instance = HuggingfaceLLM(model_id=model_id)
return instance
# Test for instantiation and attributes
def test_llm_initialization(llm_instance):
assert (
llm_instance.model_id == "NousResearch/Nous-Hermes-2-Vision-Alpha"
)
assert llm_instance.max_length == 500
# ... add more assertions for all default attributes
# Parameterized test for setting devices
@pytest.mark.parametrize("device", ["cpu", "cuda"])
def test_llm_set_device(llm_instance, device):
llm_instance.set_device(device)
assert llm_instance.device == device
# Test exception during initialization with a bad model_id
def test_llm_bad_model_initialization():
with pytest.raises(Exception):
HuggingfaceLLM(model_id="unknown-model")
# # Mocking the tokenizer and model to test run method
# @patch("swarms.models.huggingface.AutoTokenizer.from_pretrained")
# @patch(
# "swarms.models.huggingface.AutoModelForCausalLM.from_pretrained"
# )
# def test_llm_run(mock_model, mock_tokenizer, llm_instance):
# mock_model.return_value.generate.return_value = "mocked output"
# mock_tokenizer.return_value.encode.return_value = "mocked input"
# result = llm_instance.run("test task")
# assert result == "mocked output"
# Async test (requires pytest-asyncio plugin)
@pytest.mark.asyncio
async def test_llm_run_async(llm_instance):
result = await llm_instance.run_async("test task")
assert isinstance(result, str)
# Test for checking GPU availability
def test_llm_gpu_availability(llm_instance):
# Assuming the test is running on a machine where the GPU availability is known
expected_result = torch.cuda.is_available()
assert llm_instance.gpu_available() == expected_result
# Test for memory consumption reporting
def test_llm_memory_consumption(llm_instance):
# Mocking torch.cuda functions for consistent results
with patch("torch.cuda.memory_allocated", return_value=1024):
with patch("torch.cuda.memory_reserved", return_value=2048):
memory = llm_instance.memory_consumption()
assert memory == {"allocated": 1024, "reserved": 2048}
# Test different initialization parameters
@pytest.mark.parametrize(
"model_id, max_length",
[
("NousResearch/Nous-Hermes-2-Vision-Alpha", 100),
("microsoft/Orca-2-13b", 200),
(
"berkeley-nest/Starling-LM-7B-alpha",
None,
), # None to check default behavior
],
)
def test_llm_initialization_params(model_id, max_length):
if max_length:
instance = HuggingfaceLLM(model_id=model_id, max_length=max_length)
assert instance.max_length == max_length
else:
instance = HuggingfaceLLM(model_id=model_id)
assert (
instance.max_length == 500
) # Assuming 500 is the default max_length
# Test for setting an invalid device
def test_llm_set_invalid_device(llm_instance):
with pytest.raises(ValueError):
llm_instance.set_device("quantum_processor")
# Mocking external API call to test run method without network
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_run_without_network(mock_run, llm_instance):
mock_run.return_value = "mocked output"
result = llm_instance.run("test task without network")
assert result == "mocked output"
# Test handling of empty input for the run method
def test_llm_run_empty_input(llm_instance):
with pytest.raises(ValueError):
llm_instance.run("")
# Test the generation with a provided seed for reproducibility
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_run_with_seed(mock_run, llm_instance):
seed = 42
llm_instance.set_seed(seed)
# Assuming set_seed method affects the randomness in the model
# You would typically ensure that setting the seed gives reproducible results
mock_run.return_value = "mocked deterministic output"
result = llm_instance.run("test task", seed=seed)
assert result == "mocked deterministic output"
# Test the output length is as expected
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_run_output_length(mock_run, llm_instance):
input_text = "test task"
llm_instance.max_length = 50 # set a max_length for the output
mock_run.return_value = "mocked output" * 10 # some long text
result = llm_instance.run(input_text)
assert len(result.split()) <= llm_instance.max_length
# Test the tokenizer handling special tokens correctly
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.encode")
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.decode")
def test_llm_tokenizer_special_tokens(
mock_decode, mock_encode, llm_instance
):
mock_encode.return_value = "encoded input with special tokens"
mock_decode.return_value = "decoded output with special tokens"
result = llm_instance.run("test task with special tokens")
mock_encode.assert_called_once()
mock_decode.assert_called_once()
assert "special tokens" in result
# Test for correct handling of timeouts
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_timeout_handling(mock_run, llm_instance):
mock_run.side_effect = TimeoutError
with pytest.raises(TimeoutError):
llm_instance.run("test task with timeout")
# Test for response time within a threshold (performance test)
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_response_time(mock_run, llm_instance):
import time
mock_run.return_value = "mocked output"
start_time = time.time()
llm_instance.run("test task for response time")
end_time = time.time()
assert (
end_time - start_time < 1
) # Assuming the response should be faster than 1 second
# Test the logging of a warning for long inputs
@patch("swarms.models.huggingface.logging.warning")
def test_llm_long_input_warning(mock_warning, llm_instance):
long_input = "x" * 10000 # input longer than the typical limit
llm_instance.run(long_input)
mock_warning.assert_called_once()
# Test for run method behavior when model raises an exception
@patch(
"swarms.models.huggingface.HuggingfaceLLM._model.generate",
side_effect=RuntimeError,
)
def test_llm_run_model_exception(mock_generate, llm_instance):
with pytest.raises(RuntimeError):
llm_instance.run("test task when model fails")
# Test the behavior when GPU is forced but not available
@patch("torch.cuda.is_available", return_value=False)
def test_llm_force_gpu_when_unavailable(mock_is_available, llm_instance):
with pytest.raises(EnvironmentError):
llm_instance.set_device(
"cuda"
) # Attempt to set CUDA when it's not available
# Test for proper cleanup after model use (releasing resources)
@patch("swarms.models.huggingface.HuggingfaceLLM._model")
def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance):
llm_instance.cleanup()
# Assuming cleanup method is meant to free resources
mock_model.delete.assert_called_once()
mock_tokenizer.delete.assert_called_once()
# Test model's ability to handle multilingual input
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_multilingual_input(mock_run, llm_instance):
mock_run.return_value = "mocked multilingual output"
multilingual_input = "Bonjour, ceci est un test multilingue."
result = llm_instance.run(multilingual_input)
assert isinstance(
result, str
) # Simple check to ensure output is string type
# Test caching mechanism to prevent re-running the same inputs
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_caching_mechanism(mock_run, llm_instance):
input_text = "test caching mechanism"
mock_run.return_value = "cached output"
# Run the input twice
first_run_result = llm_instance.run(input_text)
second_run_result = llm_instance.run(input_text)
mock_run.assert_called_once() # Should only be called once due to caching
assert first_run_result == second_run_result
# These tests are provided as examples. In real-world scenarios, you will need to adapt these tests to the actual logic of your `HuggingfaceLLM` class.
# For instance, "mock_model.delete.assert_called_once()" and similar lines are based on hypothetical methods and behaviors that you need to replace with actual implementations.
# Mock some functions and objects for testing
@pytest.fixture
def mock_huggingface_llm(monkeypatch):
# Mock the model and tokenizer creation
def mock_init(
self,
model_id,
device="cpu",
max_length=500,
quantize=False,
quantization_config=None,
verbose=False,
distributed=False,
decoding=False,
max_workers=5,
repitition_penalty=1.3,
no_repeat_ngram_size=5,
temperature=0.7,
top_k=40,
top_p=0.8,
):
pass
# Mock the model loading
def mock_load_model(self):
pass
# Mock the model generation
def mock_run(self, task):
pass
monkeypatch.setattr(HuggingfaceLLM, "__init__", mock_init)
monkeypatch.setattr(HuggingfaceLLM, "load_model", mock_load_model)
monkeypatch.setattr(HuggingfaceLLM, "run", mock_run)
# Basic tests for initialization and attribute settings
def test_init_huggingface_llm():
llm = HuggingfaceLLM(
model_id="test_model",
device="cuda",
max_length=1000,
quantize=True,
quantization_config={"config_key": "config_value"},
verbose=True,
distributed=True,
decoding=True,
max_workers=3,
repitition_penalty=1.5,
no_repeat_ngram_size=4,
temperature=0.8,
top_k=50,
top_p=0.7,
)
assert llm.model_id == "test_model"
assert llm.device == "cuda"
assert llm.max_length == 1000
assert llm.quantize is True
assert llm.quantization_config == {"config_key": "config_value"}
assert llm.verbose is True
assert llm.distributed is True
assert llm.decoding is True
assert llm.max_workers == 3
assert llm.repitition_penalty == 1.5
assert llm.no_repeat_ngram_size == 4
assert llm.temperature == 0.8
assert llm.top_k == 50
assert llm.top_p == 0.7
# Test loading the model
def test_load_model(mock_huggingface_llm):
llm = HuggingfaceLLM(model_id="test_model")
llm.load_model()
# Test running the model
def test_run(mock_huggingface_llm):
llm = HuggingfaceLLM(model_id="test_model")
llm.run("Test prompt")
# Test for setting max_length
def test_llm_set_max_length(llm_instance):
new_max_length = 1000
llm_instance.set_max_length(new_max_length)
assert llm_instance.max_length == new_max_length
# Test for setting verbose
def test_llm_set_verbose(llm_instance):
llm_instance.set_verbose(True)
assert llm_instance.verbose is True
# Test for setting distributed
def test_llm_set_distributed(llm_instance):
llm_instance.set_distributed(True)
assert llm_instance.distributed is True
# Test for setting decoding
def test_llm_set_decoding(llm_instance):
llm_instance.set_decoding(True)
assert llm_instance.decoding is True
# Test for setting max_workers
def test_llm_set_max_workers(llm_instance):
new_max_workers = 10
llm_instance.set_max_workers(new_max_workers)
assert llm_instance.max_workers == new_max_workers
# Test for setting repitition_penalty
def test_llm_set_repitition_penalty(llm_instance):
new_repitition_penalty = 1.5
llm_instance.set_repitition_penalty(new_repitition_penalty)
assert llm_instance.repitition_penalty == new_repitition_penalty
# Test for setting no_repeat_ngram_size
def test_llm_set_no_repeat_ngram_size(llm_instance):
new_no_repeat_ngram_size = 6
llm_instance.set_no_repeat_ngram_size(new_no_repeat_ngram_size)
10 months ago
assert llm_instance.no_repeat_ngram_size == new_no_repeat_ngram_size
# Test for setting temperature
def test_llm_set_temperature(llm_instance):
new_temperature = 0.8
llm_instance.set_temperature(new_temperature)
assert llm_instance.temperature == new_temperature
# Test for setting top_k
def test_llm_set_top_k(llm_instance):
new_top_k = 50
llm_instance.set_top_k(new_top_k)
assert llm_instance.top_k == new_top_k
# Test for setting top_p
def test_llm_set_top_p(llm_instance):
new_top_p = 0.9
llm_instance.set_top_p(new_top_p)
assert llm_instance.top_p == new_top_p
# Test for setting quantize
def test_llm_set_quantize(llm_instance):
llm_instance.set_quantize(True)
assert llm_instance.quantize is True
# Test for setting quantization_config
def test_llm_set_quantization_config(llm_instance):
new_quantization_config = {
"load_in_4bit": False,
"bnb_4bit_use_double_quant": False,
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_compute_dtype": torch.bfloat16,
}
llm_instance.set_quantization_config(new_quantization_config)
assert llm_instance.quantization_config == new_quantization_config
# Test for setting model_id
def test_llm_set_model_id(llm_instance):
new_model_id = "EleutherAI/gpt-neo-2.7B"
llm_instance.set_model_id(new_model_id)
assert llm_instance.model_id == new_model_id
# Test for setting model
10 months ago
@patch("swarms.models.huggingface.AutoModelForCausalLM.from_pretrained")
def test_llm_set_model(mock_model, llm_instance):
mock_model.return_value = "mocked model"
llm_instance.set_model(mock_model)
assert llm_instance.model == "mocked model"
# Test for setting tokenizer
@patch("swarms.models.huggingface.AutoTokenizer.from_pretrained")
def test_llm_set_tokenizer(mock_tokenizer, llm_instance):
mock_tokenizer.return_value = "mocked tokenizer"
llm_instance.set_tokenizer(mock_tokenizer)
assert llm_instance.tokenizer == "mocked tokenizer"
# Test for setting logger
def test_llm_set_logger(llm_instance):
new_logger = logging.getLogger("test_logger")
llm_instance.set_logger(new_logger)
assert llm_instance.logger == new_logger
# Test for saving model
@patch("torch.save")
def test_llm_save_model(mock_save, llm_instance):
llm_instance.save_model("path/to/save")
mock_save.assert_called_once()
# Test for print_dashboard
@patch("builtins.print")
def test_llm_print_dashboard(mock_print, llm_instance):
llm_instance.print_dashboard("test task")
mock_print.assert_called()
# Test for __call__ method
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
def test_llm_call(mock_run, llm_instance):
mock_run.return_value = "mocked output"
result = llm_instance("test task")
assert result == "mocked output"