diff --git a/pyproject.toml b/pyproject.toml index c182abd9..14ed09dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ faiss-cpu = "*" backoff = "*" marshmallow = "*" datasets = "*" +optimum = "*" diffusers = "*" PyPDF2 = "*" accelerate = "*" @@ -57,7 +58,6 @@ pydantic = "1.10.12" tenacity = "*" Pillow = "*" chromadb = "*" -opencv-python-headless = "*" tabulate = "*" termcolor = "*" black = "*" diff --git a/requirements.txt b/requirements.txt index 0bc6a065..00824a1d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,6 +25,7 @@ sentencepiece PyPDF2 accelerate chromadb +optimum tiktoken tabulate colored diff --git a/sequential_workflow_example.py b/sequential_workflow_example.py index 38cf5559..c36419c1 100644 --- a/sequential_workflow_example.py +++ b/sequential_workflow_example.py @@ -16,7 +16,7 @@ llm = OpenAIChat( temperature=0.5, model_name="gpt-4", openai_api_key=api_key, - max_tokens=4000 + max_tokens=4000, ) diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py index 089585a8..be64908d 100644 --- a/swarms/models/__init__.py +++ b/swarms/models/__init__.py @@ -8,6 +8,7 @@ from swarms.models.openai_models import ( AzureOpenAI, OpenAIChat, ) # noqa: E402 + # from swarms.models.zephyr import Zephyr # noqa: E402 from swarms.models.biogpt import BioGPT # noqa: E402 from swarms.models.huggingface import HuggingfaceLLM # noqa: E402 diff --git a/swarms/models/huggingface.py b/swarms/models/huggingface.py index 88620654..bbb39223 100644 --- a/swarms/models/huggingface.py +++ b/swarms/models/huggingface.py @@ -270,9 +270,7 @@ class HuggingfaceLLM: self.print_dashboard(task) try: - inputs = self.tokenizer.encode( - task, return_tensors="pt" - ) + inputs = self.tokenizer.encode(task, return_tensors="pt") # self.log.start() diff --git a/swarms/models/huggingface_pipeline.py b/swarms/models/huggingface_pipeline.py new file mode 100644 index 00000000..81213d3b --- /dev/null +++ b/swarms/models/huggingface_pipeline.py @@ -0,0 +1,55 @@ +from abc import abstractmethod +from termcolor import colored +import torch + +from swarms.models.base_llm import AbstractLLM + +if torch.cuda.is_available(): + try: + from optimum.nvidia.pipelines import pipeline + except ImportError: + from transformers.pipelines import pipeline + + +class HuggingfacePipeline(AbstractLLM): + """HuggingfacePipeline + + + + Args: + AbstractLLM (AbstractLLM): [description] + task (str, optional): [description]. Defaults to "text-generation". + model_name (str, optional): [description]. Defaults to None. + use_fp8 (bool, optional): [description]. Defaults to False. + *args: [description] + **kwargs: [description] + + Raises: + + """ + + def __init__( + self, + task: str = "text-generation", + model_name: str = None, + use_fp8: bool = False, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.pipe = pipeline( + task, model_name, use_fp8=use_fp8 * args, **kwargs + ) + + @abstractmethod + def run(self, task: str, *args, **kwargs): + try: + out = self.pipeline(task, *args, **kwargs) + return out + except Exception as e: + print( + colored( + f"Error in {self.__class__.__name__} pipeline", + "red", + ) + ) diff --git a/tests/models/test_gpt4_vision_api.py b/tests/models/test_gpt4_vision_api.py index 14d53f74..52191e96 100644 --- a/tests/models/test_gpt4_vision_api.py +++ b/tests/models/test_gpt4_vision_api.py @@ -35,9 +35,7 @@ def test_encode_image(vision_api): def test_run_success(vision_api): - expected_response = { - "This is the model's response." - } + expected_response = {"This is the model's response."} with patch( "requests.post", return_value=Mock(json=lambda: expected_response), diff --git a/tests/models/test_huggingface.py b/tests/models/test_huggingface.py index b313eaf4..17a89535 100644 --- a/tests/models/test_huggingface.py +++ b/tests/models/test_huggingface.py @@ -18,7 +18,10 @@ def llm_instance(): # Test for instantiation and attributes def test_llm_initialization(llm_instance): - assert llm_instance.model_id == "NousResearch/Nous-Hermes-2-Vision-Alpha" + assert ( + llm_instance.model_id + == "NousResearch/Nous-Hermes-2-Vision-Alpha" + ) assert llm_instance.max_length == 500 # ... add more assertions for all default attributes @@ -77,7 +80,10 @@ def test_llm_memory_consumption(llm_instance): [ ("NousResearch/Nous-Hermes-2-Vision-Alpha", 100), ("microsoft/Orca-2-13b", 200), - ("berkeley-nest/Starling-LM-7B-alpha", None), # None to check default behavior + ( + "berkeley-nest/Starling-LM-7B-alpha", + None, + ), # None to check default behavior ], ) def test_llm_initialization_params(model_id, max_length): @@ -99,7 +105,6 @@ def test_llm_set_invalid_device(llm_instance): llm_instance.set_device("quantum_processor") - # Mocking external API call to test run method without network @patch("swarms.models.huggingface.HuggingfaceLLM.run") def test_llm_run_without_network(mock_run, llm_instance): @@ -210,9 +215,6 @@ def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance): mock_tokenizer.delete.assert_called_once() - - - # Test model's ability to handle multilingual input @patch("swarms.models.huggingface.HuggingfaceLLM.run") def test_llm_multilingual_input(mock_run, llm_instance): @@ -236,6 +238,5 @@ def test_llm_caching_mechanism(mock_run, llm_instance): assert first_run_result == second_run_result - # These tests are provided as examples. In real-world scenarios, you will need to adapt these tests to the actual logic of your `HuggingfaceLLM` class. # For instance, "mock_model.delete.assert_called_once()" and similar lines are based on hypothetical methods and behaviors that you need to replace with actual implementations.