From fea0eeebc9a83ca53651843dd587f4a2563689fd Mon Sep 17 00:00:00 2001
From: Kye <kye@apacmediasolutions.com>
Date: Fri, 3 Nov 2023 11:24:13 -0400
Subject: [PATCH] hugginface

---
 swarms/models/distilled_whisperx.py |   3 +
 swarms/models/huggingface.py        |  20 ++
 swarms/models/petals.py             |   2 +-
 tests/models/ada.py                 |  68 +++++++
 tests/models/huggingface.py         | 286 ++++++++++++++++++++++------
 5 files changed, 325 insertions(+), 54 deletions(-)
 create mode 100644 swarms/models/distilled_whisperx.py
 create mode 100644 tests/models/ada.py

diff --git a/swarms/models/distilled_whisperx.py b/swarms/models/distilled_whisperx.py
new file mode 100644
index 00000000..2eb2788d
--- /dev/null
+++ b/swarms/models/distilled_whisperx.py
@@ -0,0 +1,3 @@
+"""
+
+"""
\ No newline at end of file
diff --git a/swarms/models/huggingface.py b/swarms/models/huggingface.py
index f07edad3..437d9144 100644
--- a/swarms/models/huggingface.py
+++ b/swarms/models/huggingface.py
@@ -294,3 +294,23 @@ class HuggingfaceLLM:
         )
 
         print(dashboard)
+    
+    def set_device(self, device):
+        """
+        Changes the device used for inference.
+
+        Parameters
+        ----------
+            device : str
+                The new device to use for inference.
+        """
+        self.device = device
+        self.model.to(self.device)
+
+    def set_max_length(self, max_length):
+        """Set max_length"""
+        self.max_length = max_length
+
+    def clear_chat_history(self):
+        """Clear chat history"""
+        self.chat_history = []
diff --git a/swarms/models/petals.py b/swarms/models/petals.py
index cc90cb62..189c2477 100644
--- a/swarms/models/petals.py
+++ b/swarms/models/petals.py
@@ -35,7 +35,7 @@ class Petals:
             "max_length": self.max_length,
         }
 
-    def generate(self, prompt):
+    def __call__(self, prompt):
         """Generate text using the Petals API."""
         params = self._default_params()
         inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"]
diff --git a/tests/models/ada.py b/tests/models/ada.py
new file mode 100644
index 00000000..786b162d
--- /dev/null
+++ b/tests/models/ada.py
@@ -0,0 +1,68 @@
+# test_embeddings.py
+
+import pytest
+import openai
+from unittest.mock import patch
+from swarms.models.simple_ada import get_ada_embeddings  # Adjust this import path to your project structure
+from os import getenv
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Fixture for test texts
+@pytest.fixture
+def test_texts():
+    return [
+        "Hello World",
+        "This is a test string with newline\ncharacters",
+        "A quick brown fox jumps over the lazy dog",
+    ]
+
+# Basic Test
+def test_get_ada_embeddings_basic(test_texts):
+    with patch('openai.Embedding.create') as mock_create:
+        # Mocking the OpenAI API call
+        mock_create.return_value = {
+            "data": [
+                {"embedding": [0.1, 0.2, 0.3]}
+            ]
+        }
+        
+        for text in test_texts:
+            embedding = get_ada_embeddings(text)
+            assert embedding == [0.1, 0.2, 0.3], "Embedding does not match expected output"
+            mock_create.assert_called_with(input=[text.replace("\n", " ")], model="text-embedding-ada-002")
+
+# Parameterized Test
+@pytest.mark.parametrize(
+    "text, model, expected_call_model",
+    [
+        ("Hello World", "text-embedding-ada-002", "text-embedding-ada-002"),
+        ("Hello World", "text-embedding-ada-001", "text-embedding-ada-001"),
+    ],
+)
+def test_get_ada_embeddings_models(text, model, expected_call_model):
+    with patch('openai.Embedding.create') as mock_create:
+        mock_create.return_value = {
+            "data": [
+                {"embedding": [0.1, 0.2, 0.3]}
+            ]
+        }
+
+        _ = get_ada_embeddings(text, model=model)
+        mock_create.assert_called_with(input=[text], model=expected_call_model)
+
+# Exception Test
+def test_get_ada_embeddings_exception():
+    with patch('openai.Embedding.create') as mock_create:
+        mock_create.side_effect = openai.error.OpenAIError("Test error")
+        with pytest.raises(openai.error.OpenAIError):
+            get_ada_embeddings("Some text")
+
+# Tests for environment variable loading
+def test_env_var_loading(monkeypatch):
+    monkeypatch.setenv("OPENAI_API_KEY", "testkey123")
+    with patch('openai.Embedding.create'):
+        assert getenv("OPENAI_API_KEY") == "testkey123", "Environment variable for API key is not set correctly"
+
+# ... more tests to cover other aspects such as different input types, large inputs, invalid inputs, etc.
diff --git a/tests/models/huggingface.py b/tests/models/huggingface.py
index 46c7fa12..1bb44bed 100644
--- a/tests/models/huggingface.py
+++ b/tests/models/huggingface.py
@@ -1,58 +1,238 @@
-import pytest
 import torch
-from unittest.mock import Mock, patch
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-from swarms.models.huggingface import HuggingfaceLLM
+import pytest
+from unittest.mock import patch, MagicMock
+from swarms.models.huggingface import HuggingfaceLLM  # Replace with the actual import path
 
 
+# Fixture for the class instance
 @pytest.fixture
-def huggingface_llm():
-    # Create an instance of HuggingfaceLLM for testing.
+def llm_instance():
     model_id = "gpt2-small"
-    return HuggingfaceLLM(model_id=model_id)
-
-
-def test_initialization(huggingface_llm):
-    # Test the initialization of the HuggingfaceLLM class.
-    assert huggingface_llm.model_id == "gpt2-small"
-    assert huggingface_llm.device in ["cpu", "cuda"]
-    assert huggingface_llm.max_length == 20
-    assert huggingface_llm.verbose == False
-    assert huggingface_llm.distributed == False
-    assert huggingface_llm.decoding == False
-    assert huggingface_llm.model is None
-    assert huggingface_llm.tokenizer is None
-
-
-def test_load_model(huggingface_llm):
-    # Test loading the model.
-    huggingface_llm.load_model()
-    assert isinstance(huggingface_llm.model, AutoModelForCausalLM)
-    assert isinstance(huggingface_llm.tokenizer, AutoTokenizer)
-
-
-def test_run(huggingface_llm):
-    # Test the run method of HuggingfaceLLM.
-    prompt_text = "Once upon a time"
-    generated_text = huggingface_llm.run(prompt_text)
-    assert isinstance(generated_text, str)
-    assert len(generated_text) > 0
-
-
-def test_call_method(huggingface_llm):
-    # Test the __call__ method of HuggingfaceLLM.
-    prompt_text = "Once upon a time"
-    generated_text = huggingface_llm(prompt_text)
-    assert isinstance(generated_text, str)
-    assert len(generated_text) > 0
-
-
-def test_load_model_failure():
-    # Test loading model failure.
-    with patch(
-        "your_module.AutoModelForCausalLM.from_pretrained",
-        side_effect=Exception("Model load failed"),
-    ):
-        with pytest.raises(Exception):
-            huggingface_llm = HuggingfaceLLM(model_id="gpt2-small")
-            huggingface_llm.load_model()
+    instance = HuggingfaceLLM(model_id=model_id)
+    return instance
+
+
+# Test for instantiation and attributes
+def test_llm_initialization(llm_instance):
+    assert llm_instance.model_id == "gpt2-small"
+    assert llm_instance.max_length == 500
+    # ... add more assertions for all default attributes
+
+
+# Parameterized test for setting devices
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_llm_set_device(llm_instance, device):
+    llm_instance.set_device(device)
+    assert llm_instance.device == device
+
+
+# Test exception during initialization with a bad model_id
+def test_llm_bad_model_initialization():
+    with pytest.raises(Exception):
+        HuggingfaceLLM(model_id="unknown-model")
+
+
+# Mocking the tokenizer and model to test run method
+@patch("swarms.models.huggingface.AutoTokenizer.from_pretrained")
+@patch("swarms.models.huggingface.AutoModelForCausalLM.from_pretrained")
+def test_llm_run(mock_model, mock_tokenizer, llm_instance):
+    mock_model.return_value.generate.return_value = "mocked output"
+    mock_tokenizer.return_value.encode.return_value = "mocked input"
+    result = llm_instance.run("test task")
+    assert result == "mocked output"
+
+
+# Async test (requires pytest-asyncio plugin)
+@pytest.mark.asyncio
+async def test_llm_run_async(llm_instance):
+    result = await llm_instance.run_async("test task")
+    assert isinstance(result, str)
+
+
+# Test for checking GPU availability
+def test_llm_gpu_availability(llm_instance):
+    # Assuming the test is running on a machine where the GPU availability is known
+    expected_result = torch.cuda.is_available()
+    assert llm_instance.gpu_available() == expected_result
+
+
+# Test for memory consumption reporting
+def test_llm_memory_consumption(llm_instance):
+    # Mocking torch.cuda functions for consistent results
+    with patch("torch.cuda.memory_allocated", return_value=1024):
+        with patch("torch.cuda.memory_reserved", return_value=2048):
+            memory = llm_instance.memory_consumption()
+    assert memory == {"allocated": 1024, "reserved": 2048}
+
+
+# Test different initialization parameters
+@pytest.mark.parametrize("model_id, max_length", [
+    ("gpt2-small", 100),
+    ("gpt2-medium", 200),
+    ("gpt2-large", None)  # None to check default behavior
+])
+def test_llm_initialization_params(model_id, max_length):
+    if max_length:
+        instance = HuggingfaceLLM(model_id=model_id, max_length=max_length)
+        assert instance.max_length == max_length
+    else:
+        instance = HuggingfaceLLM(model_id=model_id)
+        assert instance.max_length == 500  # Assuming 500 is the default max_length
+
+
+# Test for setting an invalid device
+def test_llm_set_invalid_device(llm_instance):
+    with pytest.raises(ValueError):
+        llm_instance.set_device("quantum_processor")
+
+
+# Test for model download progress bar
+@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
+def test_llm_model_download_progress(mock_download, llm_instance):
+    llm_instance.download_model_with_progress()
+    mock_download.assert_called_once()
+
+
+# Mocking external API call to test run method without network
+@patch("swarms.models.huggingface.HuggingfaceLLM.run")
+def test_llm_run_without_network(mock_run, llm_instance):
+    mock_run.return_value = "mocked output"
+    result = llm_instance.run("test task without network")
+    assert result == "mocked output"
+
+
+# Test handling of empty input for the run method
+def test_llm_run_empty_input(llm_instance):
+    with pytest.raises(ValueError):
+        llm_instance.run("")
+
+
+# Test the generation with a provided seed for reproducibility
+@patch("swarms.models.huggingface.HuggingfaceLLM.run")
+def test_llm_run_with_seed(mock_run, llm_instance):
+    seed = 42
+    llm_instance.set_seed(seed)
+    # Assuming set_seed method affects the randomness in the model
+    # You would typically ensure that setting the seed gives reproducible results
+    mock_run.return_value = "mocked deterministic output"
+    result = llm_instance.run("test task", seed=seed)
+    assert result == "mocked deterministic output"
+
+
+# Test the output length is as expected
+@patch("swarms.models.huggingface.HuggingfaceLLM.run")
+def test_llm_run_output_length(mock_run, llm_instance):
+    input_text = "test task"
+    llm_instance.max_length = 50  # set a max_length for the output
+    mock_run.return_value = "mocked output" * 10  # some long text
+    result = llm_instance.run(input_text)
+    assert len(result.split()) <= llm_instance.max_length
+
+
+# Test the tokenizer handling special tokens correctly
+@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.encode")
+@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer.decode")
+def test_llm_tokenizer_special_tokens(mock_decode, mock_encode, llm_instance):
+    mock_encode.return_value = "encoded input with special tokens"
+    mock_decode.return_value = "decoded output with special tokens"
+    result = llm_instance.run("test task with special tokens")
+    mock_encode.assert_called_once()
+    mock_decode.assert_called_once()
+    assert "special tokens" in result
+
+
+# Test for correct handling of timeouts
+@patch("swarms.models.huggingface.HuggingfaceLLM.run")
+def test_llm_timeout_handling(mock_run, llm_instance):
+    mock_run.side_effect = TimeoutError
+    with pytest.raises(TimeoutError):
+        llm_instance.run("test task with timeout")
+
+
+# Test for response time within a threshold (performance test)
+@patch("swarms.models.huggingface.HuggingfaceLLM.run")
+def test_llm_response_time(mock_run, llm_instance):
+    import time
+    mock_run.return_value = "mocked output"
+    start_time = time.time()
+    llm_instance.run("test task for response time")
+    end_time = time.time()
+    assert end_time - start_time < 1  # Assuming the response should be faster than 1 second
+
+
+# Test the logging of a warning for long inputs
+@patch("swarms.models.huggingface.logging.warning")
+def test_llm_long_input_warning(mock_warning, llm_instance):
+    long_input = "x" * 10000  # input longer than the typical limit
+    llm_instance.run(long_input)
+    mock_warning.assert_called_once()
+
+
+# Test for run method behavior when model raises an exception
+@patch("swarms.models.huggingface.HuggingfaceLLM._model.generate", side_effect=RuntimeError)
+def test_llm_run_model_exception(mock_generate, llm_instance):
+    with pytest.raises(RuntimeError):
+        llm_instance.run("test task when model fails")
+
+
+# Test the behavior when GPU is forced but not available
+@patch("torch.cuda.is_available", return_value=False)
+def test_llm_force_gpu_when_unavailable(mock_is_available, llm_instance):
+    with pytest.raises(EnvironmentError):
+        llm_instance.set_device("cuda")  # Attempt to set CUDA when it's not available
+
+
+# Test for proper cleanup after model use (releasing resources)
+@patch("swarms.models.huggingface.HuggingfaceLLM._model")
+@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer")
+def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance):
+    llm_instance.cleanup()
+    # Assuming cleanup method is meant to free resources
+    mock_model.delete.assert_called_once()
+    mock_tokenizer.delete.assert_called_once()
+
+
+# Test updating the configuration after instantiation
+def test_llm_update_configuration(llm_instance):
+    new_config = {"temperature": 0.7}
+    llm_instance.update_configuration(new_config)
+    assert llm_instance.configuration["temperature"] == 0.7
+
+
+# Test if the model is re-downloaded when changing the model_id
+@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
+def test_llm_change_model_id(mock_download, llm_instance):
+    new_model_id = "gpt2-xl"
+    llm_instance.model_id = new_model_id
+    mock_download.assert_called_with(new_model_id)
+
+
+# Test model's ability to handle multilingual input
+@patch("swarms.models.huggingface.HuggingfaceLLM.run")
+def test_llm_multilingual_input(mock_run, llm_instance):
+    mock_run.return_value = "mocked multilingual output"
+    multilingual_input = "Bonjour, ceci est un test multilingue."
+    result = llm_instance.run(multilingual_input)
+    assert isinstance(result, str)  # Simple check to ensure output is string type
+
+# Test caching mechanism to prevent re-running the same inputs
+@patch("swarms.models.huggingface.HuggingfaceLLM.run")
+def test_llm_caching_mechanism(mock_run, llm_instance):
+    input_text = "test caching mechanism"
+    mock_run.return_value = "cached output"
+    # Run the input twice
+    first_run_result = llm_instance.run(input_text)
+    second_run_result = llm_instance.run(input_text)
+    mock_run.assert_called_once()  # Should only be called once due to caching
+    assert first_run_result == second_run_result
+
+
+# Ensure that model re-downloads when force_download flag is set
+@patch("swarms.models.huggingface.HuggingfaceLLM._download_model")
+def test_llm_force_download(mock_download, llm_instance):
+    llm_instance.download_model_with_progress(force_download=True)
+    mock_download.assert_called_once_with(llm_instance.model_id, force=True)
+
+
+# These tests are provided as examples. In real-world scenarios, you will need to adapt these tests to the actual logic of your `HuggingfaceLLM` class.
+# For instance, "mock_model.delete.assert_called_once()" and similar lines are based on hypothetical methods and behaviors that you need to replace with actual implementations.