From fea0eeebc9a83ca53651843dd587f4a2563689fd Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 3 Nov 2023 11:24:13 -0400 Subject: [PATCH] hugginface --- swarms/models/distilled_whisperx.py | 3 + swarms/models/huggingface.py | 20 ++ swarms/models/petals.py | 2 +- tests/models/ada.py | 68 +++++++ tests/models/huggingface.py | 286 ++++++++++++++++++++++------ 5 files changed, 325 insertions(+), 54 deletions(-) create mode 100644 swarms/models/distilled_whisperx.py create mode 100644 tests/models/ada.py diff --git a/swarms/models/distilled_whisperx.py b/swarms/models/distilled_whisperx.py new file mode 100644 index 00000000..2eb2788d --- /dev/null +++ b/swarms/models/distilled_whisperx.py @@ -0,0 +1,3 @@ +""" + +""" \ No newline at end of file diff --git a/swarms/models/huggingface.py b/swarms/models/huggingface.py index f07edad3..437d9144 100644 --- a/swarms/models/huggingface.py +++ b/swarms/models/huggingface.py @@ -294,3 +294,23 @@ class HuggingfaceLLM: ) print(dashboard) + + def set_device(self, device): + """ + Changes the device used for inference. + + Parameters + ---------- + device : str + The new device to use for inference. + """ + self.device = device + self.model.to(self.device) + + def set_max_length(self, max_length): + """Set max_length""" + self.max_length = max_length + + def clear_chat_history(self): + """Clear chat history""" + self.chat_history = [] diff --git a/swarms/models/petals.py b/swarms/models/petals.py index cc90cb62..189c2477 100644 --- a/swarms/models/petals.py +++ b/swarms/models/petals.py @@ -35,7 +35,7 @@ class Petals: "max_length": self.max_length, } - def generate(self, prompt): + def __call__(self, prompt): """Generate text using the Petals API.""" params = self._default_params() inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"] diff --git a/tests/models/ada.py b/tests/models/ada.py new file mode 100644 index 00000000..786b162d --- /dev/null +++ b/tests/models/ada.py @@ -0,0 +1,68 @@ +# test_embeddings.py + +import pytest +import openai +from unittest.mock import patch +from swarms.models.simple_ada import get_ada_embeddings # Adjust this import path to your project structure +from os import getenv +from dotenv import load_dotenv + +load_dotenv() + +# Fixture for test texts +@pytest.fixture +def test_texts(): + return [ + "Hello World", + "This is a test string with newline\ncharacters", + "A quick brown fox jumps over the lazy dog", + ] + +# Basic Test +def test_get_ada_embeddings_basic(test_texts): + with patch('openai.Embedding.create') as mock_create: + # Mocking the OpenAI API call + mock_create.return_value = { + "data": [ + {"embedding": [0.1, 0.2, 0.3]} + ] + } + + for text in test_texts: + embedding = get_ada_embeddings(text) + assert embedding == [0.1, 0.2, 0.3], "Embedding does not match expected output" + mock_create.assert_called_with(input=[text.replace("\n", " ")], model="text-embedding-ada-002") + +# Parameterized Test +@pytest.mark.parametrize( + "text, model, expected_call_model", + [ + ("Hello World", "text-embedding-ada-002", "text-embedding-ada-002"), + ("Hello World", "text-embedding-ada-001", "text-embedding-ada-001"), + ], +) +def test_get_ada_embeddings_models(text, model, expected_call_model): + with patch('openai.Embedding.create') as mock_create: + mock_create.return_value = { + "data": [ + {"embedding": [0.1, 0.2, 0.3]} + ] + } + + _ = get_ada_embeddings(text, model=model) + mock_create.assert_called_with(input=[text], model=expected_call_model) + +# Exception Test +def test_get_ada_embeddings_exception(): + with patch('openai.Embedding.create') as mock_create: + mock_create.side_effect = openai.error.OpenAIError("Test error") + with pytest.raises(openai.error.OpenAIError): + get_ada_embeddings("Some text") + +# Tests for environment variable loading +def test_env_var_loading(monkeypatch): + monkeypatch.setenv("OPENAI_API_KEY", "testkey123") + with patch('openai.Embedding.create'): + assert getenv("OPENAI_API_KEY") == "testkey123", "Environment variable for API key is not set correctly" + +# ... more tests to cover other aspects such as different input types, large inputs, invalid inputs, etc. diff --git a/tests/models/huggingface.py b/tests/models/huggingface.py index 46c7fa12..1bb44bed 100644 --- a/tests/models/huggingface.py +++ b/tests/models/huggingface.py @@ -1,58 +1,238 @@ -import pytest import torch -from unittest.mock import Mock, patch -from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig -from swarms.models.huggingface import HuggingfaceLLM +import pytest +from unittest.mock import patch, MagicMock +from swarms.models.huggingface import HuggingfaceLLM # Replace with the actual import path +# Fixture for the class instance @pytest.fixture -def huggingface_llm(): - # Create an instance of HuggingfaceLLM for testing. +def llm_instance(): model_id = "gpt2-small" - return HuggingfaceLLM(model_id=model_id) - - -def test_initialization(huggingface_llm): - # Test the initialization of the HuggingfaceLLM class. - assert huggingface_llm.model_id == "gpt2-small" - assert huggingface_llm.device in ["cpu", "cuda"] - assert huggingface_llm.max_length == 20 - assert huggingface_llm.verbose == False - assert huggingface_llm.distributed == False - assert huggingface_llm.decoding == False - assert huggingface_llm.model is None - assert huggingface_llm.tokenizer is None - - -def test_load_model(huggingface_llm): - # Test loading the model. - huggingface_llm.load_model() - assert isinstance(huggingface_llm.model, AutoModelForCausalLM) - assert isinstance(huggingface_llm.tokenizer, AutoTokenizer) - - -def test_run(huggingface_llm): - # Test the run method of HuggingfaceLLM. - prompt_text = "Once upon a time" - generated_text = huggingface_llm.run(prompt_text) - assert isinstance(generated_text, str) - assert len(generated_text) > 0 - - -def test_call_method(huggingface_llm): - # Test the __call__ method of HuggingfaceLLM. - prompt_text = "Once upon a time" - generated_text = huggingface_llm(prompt_text) - assert isinstance(generated_text, str) - assert len(generated_text) > 0 - - -def test_load_model_failure(): - # Test loading model failure. - with patch( - "your_module.AutoModelForCausalLM.from_pretrained", - side_effect=Exception("Model load failed"), - ): - with pytest.raises(Exception): - huggingface_llm = HuggingfaceLLM(model_id="gpt2-small") - huggingface_llm.load_model() + instance = HuggingfaceLLM(model_id=model_id) + return instance + + +# Test for instantiation and attributes +def test_llm_initialization(llm_instance): + assert llm_instance.model_id == "gpt2-small" + assert llm_instance.max_length == 500 + # ... add more assertions for all default attributes + + +# Parameterized test for setting devices +@pytest.mark.parametrize("device", ["cpu", "cuda"]) +def test_llm_set_device(llm_instance, device): + llm_instance.set_device(device) + assert llm_instance.device == device + + +# Test exception during initialization with a bad model_id +def test_llm_bad_model_initialization(): + with pytest.raises(Exception): + HuggingfaceLLM(model_id="unknown-model") + + +# Mocking the tokenizer and model to test run method +@patch("swarms.models.huggingface.AutoTokenizer.from_pretrained") +@patch("swarms.models.huggingface.AutoModelForCausalLM.from_pretrained") +def test_llm_run(mock_model, mock_tokenizer, llm_instance): + mock_model.return_value.generate.return_value = "mocked output" + mock_tokenizer.return_value.encode.return_value = "mocked input" + result = llm_instance.run("test task") + assert result == "mocked output" + + +# Async test (requires pytest-asyncio plugin) +@pytest.mark.asyncio +async def test_llm_run_async(llm_instance): + result = await llm_instance.run_async("test task") + assert isinstance(result, str) + + +# Test for checking GPU availability +def test_llm_gpu_availability(llm_instance): + # Assuming the test is running on a machine where the GPU availability is known + expected_result = torch.cuda.is_available() + assert llm_instance.gpu_available() == expected_result + + +# Test for memory consumption reporting +def test_llm_memory_consumption(llm_instance): + # Mocking torch.cuda functions for consistent results + with patch("torch.cuda.memory_allocated", return_value=1024): + with patch("torch.cuda.memory_reserved", return_value=2048): + memory = llm_instance.memory_consumption() + assert memory == {"allocated": 1024, "reserved": 2048} + + +# Test different initialization parameters +@pytest.mark.parametrize("model_id, max_length", [ + ("gpt2-small", 100), + ("gpt2-medium", 200), + ("gpt2-large", None) # None to check default behavior +]) +def test_llm_initialization_params(model_id, max_length): + if max_length: + instance = HuggingfaceLLM(model_id=model_id, max_length=max_length) + assert instance.max_length == max_length + else: + instance = HuggingfaceLLM(model_id=model_id) + assert instance.max_length == 500 # Assuming 500 is the default max_length + + +# Test for setting an invalid device +def test_llm_set_invalid_device(llm_instance): + with pytest.raises(ValueError): + llm_instance.set_device("quantum_processor") + + +# Test for model download progress bar +@patch("swarms.models.huggingface.HuggingfaceLLM._download_model") +def test_llm_model_download_progress(mock_download, llm_instance): + llm_instance.download_model_with_progress() + mock_download.assert_called_once() + + +# Mocking external API call to test run method without network +@patch("swarms.models.huggingface.HuggingfaceLLM.run") +def test_llm_run_without_network(mock_run, llm_instance): + mock_run.return_value = "mocked output" + result = llm_instance.run("test task without network") + assert result == "mocked output" + + +# Test handling of empty input for the run method +def test_llm_run_empty_input(llm_instance): + with pytest.raises(ValueError): + llm_instance.run("") + + +# Test the generation with a provided seed for reproducibility +@patch("swarms.models.huggingface.HuggingfaceLLM.run") +def test_llm_run_with_seed(mock_run, llm_instance): + seed = 42 + llm_instance.set_seed(seed) + # Assuming set_seed method affects the randomness in the model + # You would typically ensure that setting the seed gives reproducible results + mock_run.return_value = "mocked deterministic output" + result = llm_instance.run("test task", seed=seed) + assert result == "mocked deterministic output" + + +# Test the output length is as expected +@patch("swarms.models.huggingface.HuggingfaceLLM.run") +def test_llm_run_output_length(mock_run, llm_instance): + input_text = "test task" + llm_instance.max_length = 50 # set a max_length for the output + mock_run.return_value = "mocked output" * 10 # some long text + result = llm_instance.run(input_text) + assert len(result.split()) <= llm_instance.max_length + + +# Test the tokenizer handling special tokens correctly +@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.encode") +@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.decode") +def test_llm_tokenizer_special_tokens(mock_decode, mock_encode, llm_instance): + mock_encode.return_value = "encoded input with special tokens" + mock_decode.return_value = "decoded output with special tokens" + result = llm_instance.run("test task with special tokens") + mock_encode.assert_called_once() + mock_decode.assert_called_once() + assert "special tokens" in result + + +# Test for correct handling of timeouts +@patch("swarms.models.huggingface.HuggingfaceLLM.run") +def test_llm_timeout_handling(mock_run, llm_instance): + mock_run.side_effect = TimeoutError + with pytest.raises(TimeoutError): + llm_instance.run("test task with timeout") + + +# Test for response time within a threshold (performance test) +@patch("swarms.models.huggingface.HuggingfaceLLM.run") +def test_llm_response_time(mock_run, llm_instance): + import time + mock_run.return_value = "mocked output" + start_time = time.time() + llm_instance.run("test task for response time") + end_time = time.time() + assert end_time - start_time < 1 # Assuming the response should be faster than 1 second + + +# Test the logging of a warning for long inputs +@patch("swarms.models.huggingface.logging.warning") +def test_llm_long_input_warning(mock_warning, llm_instance): + long_input = "x" * 10000 # input longer than the typical limit + llm_instance.run(long_input) + mock_warning.assert_called_once() + + +# Test for run method behavior when model raises an exception +@patch("swarms.models.huggingface.HuggingfaceLLM._model.generate", side_effect=RuntimeError) +def test_llm_run_model_exception(mock_generate, llm_instance): + with pytest.raises(RuntimeError): + llm_instance.run("test task when model fails") + + +# Test the behavior when GPU is forced but not available +@patch("torch.cuda.is_available", return_value=False) +def test_llm_force_gpu_when_unavailable(mock_is_available, llm_instance): + with pytest.raises(EnvironmentError): + llm_instance.set_device("cuda") # Attempt to set CUDA when it's not available + + +# Test for proper cleanup after model use (releasing resources) +@patch("swarms.models.huggingface.HuggingfaceLLM._model") +@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer") +def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance): + llm_instance.cleanup() + # Assuming cleanup method is meant to free resources + mock_model.delete.assert_called_once() + mock_tokenizer.delete.assert_called_once() + + +# Test updating the configuration after instantiation +def test_llm_update_configuration(llm_instance): + new_config = {"temperature": 0.7} + llm_instance.update_configuration(new_config) + assert llm_instance.configuration["temperature"] == 0.7 + + +# Test if the model is re-downloaded when changing the model_id +@patch("swarms.models.huggingface.HuggingfaceLLM._download_model") +def test_llm_change_model_id(mock_download, llm_instance): + new_model_id = "gpt2-xl" + llm_instance.model_id = new_model_id + mock_download.assert_called_with(new_model_id) + + +# Test model's ability to handle multilingual input +@patch("swarms.models.huggingface.HuggingfaceLLM.run") +def test_llm_multilingual_input(mock_run, llm_instance): + mock_run.return_value = "mocked multilingual output" + multilingual_input = "Bonjour, ceci est un test multilingue." + result = llm_instance.run(multilingual_input) + assert isinstance(result, str) # Simple check to ensure output is string type + +# Test caching mechanism to prevent re-running the same inputs +@patch("swarms.models.huggingface.HuggingfaceLLM.run") +def test_llm_caching_mechanism(mock_run, llm_instance): + input_text = "test caching mechanism" + mock_run.return_value = "cached output" + # Run the input twice + first_run_result = llm_instance.run(input_text) + second_run_result = llm_instance.run(input_text) + mock_run.assert_called_once() # Should only be called once due to caching + assert first_run_result == second_run_result + + +# Ensure that model re-downloads when force_download flag is set +@patch("swarms.models.huggingface.HuggingfaceLLM._download_model") +def test_llm_force_download(mock_download, llm_instance): + llm_instance.download_model_with_progress(force_download=True) + mock_download.assert_called_once_with(llm_instance.model_id, force=True) + + +# These tests are provided as examples. In real-world scenarios, you will need to adapt these tests to the actual logic of your `HuggingfaceLLM` class. +# For instance, "mock_model.delete.assert_called_once()" and similar lines are based on hypothetical methods and behaviors that you need to replace with actual implementations.