hugginface

2 years ago · fea0eeebc9
parent 8dfb1d33d0
commit fea0eeebc9
5 changed files with 325 additions and 54 deletions
--- a/swarms/models/distilled_whisperx.py
+++ b/swarms/models/distilled_whisperx.py
@ -0,0 +1,3 @@
 """
 """
--- a/swarms/models/huggingface.py
+++ b/swarms/models/huggingface.py
@ -294,3 +294,23 @@ class HuggingfaceLLM:
        )
        print(dashboard)
    def set_device(self, device):
        """
        Changes the device used for inference.
        Parameters
        ----------
            device : str
                The new device to use for inference.
        """
        self.device = device
        self.model.to(self.device)
    def set_max_length(self, max_length):
        """Set max_length"""
        self.max_length = max_length
    def clear_chat_history(self):
        """Clear chat history"""
        self.chat_history = []
--- a/swarms/models/petals.py
+++ b/swarms/models/petals.py
@ -35,7 +35,7 @@ class Petals:
            "max_length": self.max_length,
        }
-    def generate(self, prompt):
+    def __call__(self, prompt):
        """Generate text using the Petals API."""
        params = self._default_params()
        inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"]
--- a/tests/models/ada.py
+++ b/tests/models/ada.py
@ -0,0 +1,68 @@
 # test_embeddings.py
 import pytest
 import openai
 from unittest.mock import patch
 from swarms.models.simple_ada import get_ada_embeddings  # Adjust this import path to your project structure
 from os import getenv
 from dotenv import load_dotenv
 load_dotenv()
 # Fixture for test texts
@pytest.fixture
 def test_texts():
    return [
        "Hello World",
        "This is a test string with newline\ncharacters",
        "A quick brown fox jumps over the lazy dog",
    ]
 # Basic Test
 def test_get_ada_embeddings_basic(test_texts):
    with patch('openai.Embedding.create') as mock_create:
        # Mocking the OpenAI API call
        mock_create.return_value = {
            "data": [
                {"embedding": [0.1, 0.2, 0.3]}
            ]
        }
        for text in test_texts:
            embedding = get_ada_embeddings(text)
            assert embedding == [0.1, 0.2, 0.3], "Embedding does not match expected output"
            mock_create.assert_called_with(input=[text.replace("\n", " ")], model="text-embedding-ada-002")
 # Parameterized Test
@pytest.mark.parametrize(
    "text, model, expected_call_model",
    [
        ("Hello World", "text-embedding-ada-002", "text-embedding-ada-002"),
        ("Hello World", "text-embedding-ada-001", "text-embedding-ada-001"),
    ],
 )
 def test_get_ada_embeddings_models(text, model, expected_call_model):
    with patch('openai.Embedding.create') as mock_create:
        mock_create.return_value = {
            "data": [
                {"embedding": [0.1, 0.2, 0.3]}
            ]
        }
        _ = get_ada_embeddings(text, model=model)
        mock_create.assert_called_with(input=[text], model=expected_call_model)
 # Exception Test
 def test_get_ada_embeddings_exception():
    with patch('openai.Embedding.create') as mock_create:
        mock_create.side_effect = openai.error.OpenAIError("Test error")
        with pytest.raises(openai.error.OpenAIError):
            get_ada_embeddings("Some text")
 # Tests for environment variable loading
 def test_env_var_loading(monkeypatch):
    monkeypatch.setenv("OPENAI_API_KEY", "testkey123")
    with patch('openai.Embedding.create'):
        assert getenv("OPENAI_API_KEY") == "testkey123", "Environment variable for API key is not set correctly"
 # ... more tests to cover other aspects such as different input types, large inputs, invalid inputs, etc.
--- a/tests/models/huggingface.py
+++ b/tests/models/huggingface.py
@ -1,58 +1,238 @@
 import pytest
 import torch
-from unittest.mock import Mock, patch
+import pytest
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from unittest.mock import patch, MagicMock
-from swarms.models.huggingface import HuggingfaceLLM
+from swarms.models.huggingface import HuggingfaceLLM  # Replace with the actual import path
 # Fixture for the class instance
@pytest.fixture
-def huggingface_llm():
+def llm_instance():
    # Create an instance of HuggingfaceLLM for testing.
    model_id = "gpt2-small"
-    return HuggingfaceLLM(model_id=model_id)
+    instance = HuggingfaceLLM(model_id=model_id)
-
+    return instance
-
+
-def test_initialization(huggingface_llm):
+
-    # Test the initialization of the HuggingfaceLLM class.
+# Test for instantiation and attributes
-    assert huggingface_llm.model_id == "gpt2-small"
+def test_llm_initialization(llm_instance):
-    assert huggingface_llm.device in ["cpu", "cuda"]
+    assert llm_instance.model_id == "gpt2-small"
-    assert huggingface_llm.max_length == 20
+    assert llm_instance.max_length == 500
-    assert huggingface_llm.verbose == False
+    # ... add more assertions for all default attributes
-    assert huggingface_llm.distributed == False
+
-    assert huggingface_llm.decoding == False
+
-    assert huggingface_llm.model is None
+# Parameterized test for setting devices
-    assert huggingface_llm.tokenizer is None
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
-
+def test_llm_set_device(llm_instance, device):
-
+    llm_instance.set_device(device)
-def test_load_model(huggingface_llm):
+    assert llm_instance.device == device
-    # Test loading the model.
+
-    huggingface_llm.load_model()
+
-    assert isinstance(huggingface_llm.model, AutoModelForCausalLM)
+# Test exception during initialization with a bad model_id
-    assert isinstance(huggingface_llm.tokenizer, AutoTokenizer)
+def test_llm_bad_model_initialization():
-
+    with pytest.raises(Exception):
-
+        HuggingfaceLLM(model_id="unknown-model")
-def test_run(huggingface_llm):
+
-    # Test the run method of HuggingfaceLLM.
+
-    prompt_text = "Once upon a time"
+# Mocking the tokenizer and model to test run method
-    generated_text = huggingface_llm.run(prompt_text)
+@patch("swarms.models.huggingface.AutoTokenizer.from_pretrained")
-    assert isinstance(generated_text, str)
+@patch("swarms.models.huggingface.AutoModelForCausalLM.from_pretrained")
-    assert len(generated_text) > 0
+def test_llm_run(mock_model, mock_tokenizer, llm_instance):
-
+    mock_model.return_value.generate.return_value = "mocked output"
-
+    mock_tokenizer.return_value.encode.return_value = "mocked input"
-def test_call_method(huggingface_llm):
+    result = llm_instance.run("test task")
-    # Test the __call__ method of HuggingfaceLLM.
+    assert result == "mocked output"
-    prompt_text = "Once upon a time"
+
-    generated_text = huggingface_llm(prompt_text)
+
-    assert isinstance(generated_text, str)
+# Async test (requires pytest-asyncio plugin)
-    assert len(generated_text) > 0
+@pytest.mark.asyncio
-
+async def test_llm_run_async(llm_instance):
-
+    result = await llm_instance.run_async("test task")
-def test_load_model_failure():
+    assert isinstance(result, str)
-    # Test loading model failure.
+
-    with patch(
+
-        "your_module.AutoModelForCausalLM.from_pretrained",
+# Test for checking GPU availability
-        side_effect=Exception("Model load failed"),
+def test_llm_gpu_availability(llm_instance):
-    ):
+    # Assuming the test is running on a machine where the GPU availability is known
-        with pytest.raises(Exception):
+    expected_result = torch.cuda.is_available()
-            huggingface_llm = HuggingfaceLLM(model_id="gpt2-small")
+    assert llm_instance.gpu_available() == expected_result
-            huggingface_llm.load_model()
+
 # Test for memory consumption reporting
 def test_llm_memory_consumption(llm_instance):
    # Mocking torch.cuda functions for consistent results
    with patch("torch.cuda.memory_allocated", return_value=1024):
        with patch("torch.cuda.memory_reserved", return_value=2048):
            memory = llm_instance.memory_consumption()
    assert memory == {"allocated": 1024, "reserved": 2048}
 # Test different initialization parameters
@pytest.mark.parametrize("model_id, max_length", [
    ("gpt2-small", 100),
    ("gpt2-medium", 200),
    ("gpt2-large", None)  # None to check default behavior
 ])
 def test_llm_initialization_params(model_id, max_length):
    if max_length:
        instance = HuggingfaceLLM(model_id=model_id, max_length=max_length)
        assert instance.max_length == max_length
    else:
        instance = HuggingfaceLLM(model_id=model_id)
        assert instance.max_length == 500  # Assuming 500 is the default max_length
 # Test for setting an invalid device
 def test_llm_set_invalid_device(llm_instance):
    with pytest.raises(ValueError):
        llm_instance.set_device("quantum_processor")
 # Test for model download progress bar
@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
 def test_llm_model_download_progress(mock_download, llm_instance):
    llm_instance.download_model_with_progress()
    mock_download.assert_called_once()
 # Mocking external API call to test run method without network
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
 def test_llm_run_without_network(mock_run, llm_instance):
    mock_run.return_value = "mocked output"
    result = llm_instance.run("test task without network")
    assert result == "mocked output"
 # Test handling of empty input for the run method
 def test_llm_run_empty_input(llm_instance):
    with pytest.raises(ValueError):
        llm_instance.run("")
 # Test the generation with a provided seed for reproducibility
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
 def test_llm_run_with_seed(mock_run, llm_instance):
    seed = 42
    llm_instance.set_seed(seed)
    # Assuming set_seed method affects the randomness in the model
    # You would typically ensure that setting the seed gives reproducible results
    mock_run.return_value = "mocked deterministic output"
    result = llm_instance.run("test task", seed=seed)
    assert result == "mocked deterministic output"
 # Test the output length is as expected
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
 def test_llm_run_output_length(mock_run, llm_instance):
    input_text = "test task"
    llm_instance.max_length = 50  # set a max_length for the output
    mock_run.return_value = "mocked output" * 10  # some long text
    result = llm_instance.run(input_text)
    assert len(result.split()) <= llm_instance.max_length
 # Test the tokenizer handling special tokens correctly
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.encode")
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.decode")
 def test_llm_tokenizer_special_tokens(mock_decode, mock_encode, llm_instance):
    mock_encode.return_value = "encoded input with special tokens"
    mock_decode.return_value = "decoded output with special tokens"
    result = llm_instance.run("test task with special tokens")
    mock_encode.assert_called_once()
    mock_decode.assert_called_once()
    assert "special tokens" in result
 # Test for correct handling of timeouts
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
 def test_llm_timeout_handling(mock_run, llm_instance):
    mock_run.side_effect = TimeoutError
    with pytest.raises(TimeoutError):
        llm_instance.run("test task with timeout")
 # Test for response time within a threshold (performance test)
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
 def test_llm_response_time(mock_run, llm_instance):
    import time
    mock_run.return_value = "mocked output"
    start_time = time.time()
    llm_instance.run("test task for response time")
    end_time = time.time()
    assert end_time - start_time < 1  # Assuming the response should be faster than 1 second
 # Test the logging of a warning for long inputs
@patch("swarms.models.huggingface.logging.warning")
 def test_llm_long_input_warning(mock_warning, llm_instance):
    long_input = "x" * 10000  # input longer than the typical limit
    llm_instance.run(long_input)
    mock_warning.assert_called_once()
 # Test for run method behavior when model raises an exception
@patch("swarms.models.huggingface.HuggingfaceLLM._model.generate", side_effect=RuntimeError)
 def test_llm_run_model_exception(mock_generate, llm_instance):
    with pytest.raises(RuntimeError):
        llm_instance.run("test task when model fails")
 # Test the behavior when GPU is forced but not available
@patch("torch.cuda.is_available", return_value=False)
 def test_llm_force_gpu_when_unavailable(mock_is_available, llm_instance):
    with pytest.raises(EnvironmentError):
        llm_instance.set_device("cuda")  # Attempt to set CUDA when it's not available
 # Test for proper cleanup after model use (releasing resources)
@patch("swarms.models.huggingface.HuggingfaceLLM._model")
@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer")
 def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance):
    llm_instance.cleanup()
    # Assuming cleanup method is meant to free resources
    mock_model.delete.assert_called_once()
    mock_tokenizer.delete.assert_called_once()
 # Test updating the configuration after instantiation
 def test_llm_update_configuration(llm_instance):
    new_config = {"temperature": 0.7}
    llm_instance.update_configuration(new_config)
    assert llm_instance.configuration["temperature"] == 0.7
 # Test if the model is re-downloaded when changing the model_id
@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
 def test_llm_change_model_id(mock_download, llm_instance):
    new_model_id = "gpt2-xl"
    llm_instance.model_id = new_model_id
    mock_download.assert_called_with(new_model_id)
 # Test model's ability to handle multilingual input
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
 def test_llm_multilingual_input(mock_run, llm_instance):
    mock_run.return_value = "mocked multilingual output"
    multilingual_input = "Bonjour, ceci est un test multilingue."
    result = llm_instance.run(multilingual_input)
    assert isinstance(result, str)  # Simple check to ensure output is string type
 # Test caching mechanism to prevent re-running the same inputs
@patch("swarms.models.huggingface.HuggingfaceLLM.run")
 def test_llm_caching_mechanism(mock_run, llm_instance):
    input_text = "test caching mechanism"
    mock_run.return_value = "cached output"
    # Run the input twice
    first_run_result = llm_instance.run(input_text)
    second_run_result = llm_instance.run(input_text)
    mock_run.assert_called_once()  # Should only be called once due to caching
    assert first_run_result == second_run_result
 # Ensure that model re-downloads when force_download flag is set
@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
 def test_llm_force_download(mock_download, llm_instance):
    llm_instance.download_model_with_progress(force_download=True)
    mock_download.assert_called_once_with(llm_instance.model_id, force=True)
 # These tests are provided as examples. In real-world scenarios, you will need to adapt these tests to the actual logic of your `HuggingfaceLLM` class.
 # For instance, "mock_model.delete.assert_called_once()" and similar lines are based on hypothetical methods and behaviors that you need to replace with actual implementations.