from unittest.mock import MagicMock, patch import pytest import torch from swarms.models.huggingface import ( HuggingfaceLLM, # Replace with the actual import path ) # Fixture for the class instance @pytest.fixture def llm_instance(): model_id = "gpt2-small" instance = HuggingfaceLLM(model_id=model_id) return instance # Test for instantiation and attributes def test_llm_initialization(llm_instance): assert llm_instance.model_id == "gpt2-small" assert llm_instance.max_length == 500 # ... add more assertions for all default attributes # Parameterized test for setting devices @pytest.mark.parametrize("device", ["cpu", "cuda"]) def test_llm_set_device(llm_instance, device): llm_instance.set_device(device) assert llm_instance.device == device # Test exception during initialization with a bad model_id def test_llm_bad_model_initialization(): with pytest.raises(Exception): HuggingfaceLLM(model_id="unknown-model") # Mocking the tokenizer and model to test run method @patch("swarms.models.huggingface.AutoTokenizer.from_pretrained") @patch("swarms.models.huggingface.AutoModelForCausalLM.from_pretrained") def test_llm_run(mock_model, mock_tokenizer, llm_instance): mock_model.return_value.generate.return_value = "mocked output" mock_tokenizer.return_value.encode.return_value = "mocked input" result = llm_instance.run("test task") assert result == "mocked output" # Async test (requires pytest-asyncio plugin) @pytest.mark.asyncio async def test_llm_run_async(llm_instance): result = await llm_instance.run_async("test task") assert isinstance(result, str) # Test for checking GPU availability def test_llm_gpu_availability(llm_instance): # Assuming the test is running on a machine where the GPU availability is known expected_result = torch.cuda.is_available() assert llm_instance.gpu_available() == expected_result # Test for memory consumption reporting def test_llm_memory_consumption(llm_instance): # Mocking torch.cuda functions for consistent results with patch("torch.cuda.memory_allocated", return_value=1024): with patch("torch.cuda.memory_reserved", return_value=2048): memory = llm_instance.memory_consumption() assert memory == {"allocated": 1024, "reserved": 2048} # Test different initialization parameters @pytest.mark.parametrize( "model_id, max_length", [ ("gpt2-small", 100), ("gpt2-medium", 200), ("gpt2-large", None), # None to check default behavior ], ) def test_llm_initialization_params(model_id, max_length): if max_length: instance = HuggingfaceLLM(model_id=model_id, max_length=max_length) assert instance.max_length == max_length else: instance = HuggingfaceLLM(model_id=model_id) assert instance.max_length == 500 # Assuming 500 is the default max_length # Test for setting an invalid device def test_llm_set_invalid_device(llm_instance): with pytest.raises(ValueError): llm_instance.set_device("quantum_processor") # Test for model download progress bar @patch("swarms.models.huggingface.HuggingfaceLLM._download_model") def test_llm_model_download_progress(mock_download, llm_instance): llm_instance.download_model_with_progress() mock_download.assert_called_once() # Mocking external API call to test run method without network @patch("swarms.models.huggingface.HuggingfaceLLM.run") def test_llm_run_without_network(mock_run, llm_instance): mock_run.return_value = "mocked output" result = llm_instance.run("test task without network") assert result == "mocked output" # Test handling of empty input for the run method def test_llm_run_empty_input(llm_instance): with pytest.raises(ValueError): llm_instance.run("") # Test the generation with a provided seed for reproducibility @patch("swarms.models.huggingface.HuggingfaceLLM.run") def test_llm_run_with_seed(mock_run, llm_instance): seed = 42 llm_instance.set_seed(seed) # Assuming set_seed method affects the randomness in the model # You would typically ensure that setting the seed gives reproducible results mock_run.return_value = "mocked deterministic output" result = llm_instance.run("test task", seed=seed) assert result == "mocked deterministic output" # Test the output length is as expected @patch("swarms.models.huggingface.HuggingfaceLLM.run") def test_llm_run_output_length(mock_run, llm_instance): input_text = "test task" llm_instance.max_length = 50 # set a max_length for the output mock_run.return_value = "mocked output" * 10 # some long text result = llm_instance.run(input_text) assert len(result.split()) <= llm_instance.max_length # Test the tokenizer handling special tokens correctly @patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.encode") @patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.decode") def test_llm_tokenizer_special_tokens(mock_decode, mock_encode, llm_instance): mock_encode.return_value = "encoded input with special tokens" mock_decode.return_value = "decoded output with special tokens" result = llm_instance.run("test task with special tokens") mock_encode.assert_called_once() mock_decode.assert_called_once() assert "special tokens" in result # Test for correct handling of timeouts @patch("swarms.models.huggingface.HuggingfaceLLM.run") def test_llm_timeout_handling(mock_run, llm_instance): mock_run.side_effect = TimeoutError with pytest.raises(TimeoutError): llm_instance.run("test task with timeout") # Test for response time within a threshold (performance test) @patch("swarms.models.huggingface.HuggingfaceLLM.run") def test_llm_response_time(mock_run, llm_instance): import time mock_run.return_value = "mocked output" start_time = time.time() llm_instance.run("test task for response time") end_time = time.time() assert ( end_time - start_time < 1 ) # Assuming the response should be faster than 1 second # Test the logging of a warning for long inputs @patch("swarms.models.huggingface.logging.warning") def test_llm_long_input_warning(mock_warning, llm_instance): long_input = "x" * 10000 # input longer than the typical limit llm_instance.run(long_input) mock_warning.assert_called_once() # Test for run method behavior when model raises an exception @patch( "swarms.models.huggingface.HuggingfaceLLM._model.generate", side_effect=RuntimeError ) def test_llm_run_model_exception(mock_generate, llm_instance): with pytest.raises(RuntimeError): llm_instance.run("test task when model fails") # Test the behavior when GPU is forced but not available @patch("torch.cuda.is_available", return_value=False) def test_llm_force_gpu_when_unavailable(mock_is_available, llm_instance): with pytest.raises(EnvironmentError): llm_instance.set_device("cuda") # Attempt to set CUDA when it's not available # Test for proper cleanup after model use (releasing resources) @patch("swarms.models.huggingface.HuggingfaceLLM._model") @patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer") def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance): llm_instance.cleanup() # Assuming cleanup method is meant to free resources mock_model.delete.assert_called_once() mock_tokenizer.delete.assert_called_once() # Test updating the configuration after instantiation def test_llm_update_configuration(llm_instance): new_config = {"temperature": 0.7} llm_instance.update_configuration(new_config) assert llm_instance.configuration["temperature"] == 0.7 # Test if the model is re-downloaded when changing the model_id @patch("swarms.models.huggingface.HuggingfaceLLM._download_model") def test_llm_change_model_id(mock_download, llm_instance): new_model_id = "gpt2-xl" llm_instance.model_id = new_model_id mock_download.assert_called_with(new_model_id) # Test model's ability to handle multilingual input @patch("swarms.models.huggingface.HuggingfaceLLM.run") def test_llm_multilingual_input(mock_run, llm_instance): mock_run.return_value = "mocked multilingual output" multilingual_input = "Bonjour, ceci est un test multilingue." result = llm_instance.run(multilingual_input) assert isinstance(result, str) # Simple check to ensure output is string type # Test caching mechanism to prevent re-running the same inputs @patch("swarms.models.huggingface.HuggingfaceLLM.run") def test_llm_caching_mechanism(mock_run, llm_instance): input_text = "test caching mechanism" mock_run.return_value = "cached output" # Run the input twice first_run_result = llm_instance.run(input_text) second_run_result = llm_instance.run(input_text) mock_run.assert_called_once() # Should only be called once due to caching assert first_run_result == second_run_result # Ensure that model re-downloads when force_download flag is set @patch("swarms.models.huggingface.HuggingfaceLLM._download_model") def test_llm_force_download(mock_download, llm_instance): llm_instance.download_model_with_progress(force_download=True) mock_download.assert_called_once_with(llm_instance.model_id, force=True) # These tests are provided as examples. In real-world scenarios, you will need to adapt these tests to the actual logic of your `HuggingfaceLLM` class. # For instance, "mock_model.delete.assert_called_once()" and similar lines are based on hypothetical methods and behaviors that you need to replace with actual implementations.