huggingface

Former-commit-id: 924bf9bd7f
2 years ago · 885c0e2af5
parent b6a8165b85
commit 885c0e2af5
4 changed files with 219 additions and 7 deletions
--- a/docs/swarms/models/huggingface.md
+++ b/docs/swarms/models/huggingface.md
@ -0,0 +1,153 @@
 ## `HuggingfaceLLM` Documentation
 ### Introduction
 The `HuggingfaceLLM` class is designed for running inference using models from the Hugging Face Transformers library. This documentation provides an in-depth understanding of the class, its purpose, attributes, methods, and usage examples.
 #### Purpose
 The `HuggingfaceLLM` class serves the following purposes:
 1. Load pre-trained Hugging Face models and tokenizers.
 2. Generate text-based responses from the loaded model using a given prompt.
 3. Provide flexibility in device selection, quantization, and other configuration options.
 ### Class Definition
 The `HuggingfaceLLM` class is defined as follows:
 ```python
 class HuggingfaceLLM:
    def __init__(
        self,
        model_id: str,
        device: str = None,
        max_length: int = 20,
        quantize: bool = False,
        quantization_config: dict = None,
        verbose=False,
        distributed=False,
        decoding=False,
    ):
        # Attributes and initialization logic explained below
        pass
    def load_model(self):
        # Method to load the pre-trained model and tokenizer
        pass
    def run(self, prompt_text: str, max_length: int = None):
        # Method to generate text-based responses
        pass
    def __call__(self, prompt_text: str, max_length: int = None):
        # Alternate method for generating text-based responses
        pass
 ```
 ### Attributes
 | Attribute            | Description                                                                                                               |
 |----------------------|---------------------------------------------------------------------------------------------------------------------------|
 | `model_id`           | The ID of the pre-trained model to be used.                                                                              |
 | `device`             | The device on which the model runs (`'cuda'` for GPU or `'cpu'` for CPU).                                              |
 | `max_length`         | The maximum length of the generated text.                                                                                |
 | `quantize`           | A boolean indicating whether quantization should be used.                                                               |
 | `quantization_config`| A dictionary with configuration options for quantization.                                                                |
 | `verbose`            | A boolean indicating whether verbose logs should be printed.                                                             |
 | `logger`             | An optional logger for logging messages (defaults to a basic logger).                                                   |
 | `distributed`        | A boolean indicating whether distributed processing should be used.                                                     |
 | `decoding`           | A boolean indicating whether to perform decoding during text generation.                                                  |
 ### Class Methods
 #### `__init__` Method
 The `__init__` method initializes an instance of the `HuggingfaceLLM` class with the specified parameters. It also loads the pre-trained model and tokenizer.
 - `model_id` (str): The ID of the pre-trained model to use.
 - `device` (str, optional): The device to run the model on ('cuda' or 'cpu').
 - `max_length` (int, optional): The maximum length of the generated text.
 - `quantize` (bool, optional): Whether to use quantization.
 - `quantization_config` (dict, optional): Configuration for quantization.
 - `verbose` (bool, optional): Whether to print verbose logs.
 - `logger` (logging.Logger, optional): The logger to use.
 - `distributed` (bool, optional): Whether to use distributed processing.
 - `decoding` (bool, optional): Whether to perform decoding during text generation.
 #### `load_model` Method
 The `load_model` method loads the pre-trained model and tokenizer specified by `model_id`.
 #### `run` and `__call__` Methods
 Both `run` and `__call__` methods generate text-based responses based on a given prompt. They accept the following parameters:
 - `prompt_text` (str): The text prompt to initiate text generation.
 - `max_length` (int, optional): The maximum length of the generated text.
 ### Usage Examples
 Here are three ways to use the `HuggingfaceLLM` class:
 #### Example 1: Basic Usage
 ```python
 from your_module import HuggingfaceLLM
 # Initialize the HuggingfaceLLM instance with a model ID
 model_id = "gpt2-small"
 inference = HuggingfaceLLM(model_id=model_id)
 # Generate text based on a prompt
 prompt_text = "Once upon a time"
 generated_text = inference(prompt_text)
 print(generated_text)
 ```
 #### Example 2: Custom Configuration
 ```python
 from your_module import HuggingfaceLLM
 # Initialize with custom configuration
 custom_config = {
    "quantize": True,
    "quantization_config": {"load_in_4bit": True},
    "verbose": True
 }
 inference = HuggingfaceLLM(model_id="gpt2-small", **custom_config)
 # Generate text based on a prompt
 prompt_text = "Tell me a joke"
 generated_text = inference(prompt_text)
 print(generated_text)
 ```
 #### Example 3: Distributed Processing
 ```python
 from your_module import HuggingfaceLLM
 # Initialize for distributed processing
 inference = HuggingfaceLLM(model_id="gpt2-medium", distributed=True)
 # Generate text based on a prompt
 prompt_text = "Translate the following sentence to French"
 generated_text = inference(prompt_text)
 print(generated_text)
 ```
 ### Additional Information
 - The `HuggingfaceLLM` class provides the flexibility to load and use pre-trained models from the Hugging Face Transformers library.
 - Quantization can be enabled to reduce model size and inference time.
 - Distributed processing can be used for parallelized inference.
 - Verbose logging can help in debugging and understanding the text generation process.
 ### References
 - [Hugging Face Transformers Documentation](https://huggingface.co/transformers/)
 - [PyTorch Documentation](https://pytorch.org/docs/stable/index.html)
 This documentation provides a comprehensive understanding of the `HuggingfaceLLM` class, its attributes, methods, and usage examples. Developers can use this class to perform text generation tasks efficiently using pre-trained models from the Hugging Face Transformers library.
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -92,7 +92,7 @@ nav:
  - swarms.models:
    - Language:  
      - Overview: "swarms/models/index.md"
-      - HuggingFaceLLM: "swarms/models/hf.md"
+      - HuggingFaceLLM: "swarms/models/huggingface.md"
      - Anthropic: "swarms/models/anthropic.md"
      - OpenAI: "swarms/models/openai.md"
      - Zephyr: "swarms/models/zephyr.md"
--- a/swarms/models/huggingface.py
+++ b/swarms/models/huggingface.py
@ -35,7 +35,7 @@ class HuggingfaceLLM:
        self,
        model_id: str,
        device: str = None,
-        max_length: int = 20,
+        max_length: int = 500,
        quantize: bool = False,
        quantization_config: dict = None,
        verbose=False,
@ -83,6 +83,7 @@ class HuggingfaceLLM:
            raise
    def load_model(self):
        """Load the model"""
        if not self.model or not self.tokenizer:
            try:
                self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
@ -103,7 +104,7 @@ class HuggingfaceLLM:
                self.logger.error(f"Failed to load the model or the tokenizer: {error}")
                raise
-    def run(self, prompt_text: str, max_length: int = None):
+    def run(self, prompt_text: str):
        """
        Generate a response based on the prompt text.
@ -116,7 +117,7 @@ class HuggingfaceLLM:
        """
        self.load_model()
-        max_length = max_length if max_length else self.max_length
+        max_length = self.max_length
        try:
            inputs = self.tokenizer.encode(prompt_text, return_tensors="pt").to(
@ -157,7 +158,7 @@ class HuggingfaceLLM:
            self.logger.error(f"Failed to generate the text: {e}")
            raise
-    def __call__(self, prompt_text: str, max_length: int = None):
+    def __call__(self, prompt_text: str):
        """
        Generate a response based on the prompt text.
@ -170,7 +171,7 @@ class HuggingfaceLLM:
        """
        self.load_model()
-        max_length = max_length if max_length else self.max_length
+        max_length = self.max_
        try:
            inputs = self.tokenizer.encode(prompt_text, return_tensors="pt").to(
@ -210,4 +211,4 @@ class HuggingfaceLLM:
            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        except Exception as e:
            self.logger.error(f"Failed to generate the text: {e}")
-            raise
+            raise
--- a/tests/models/huggingface.py
+++ b/tests/models/huggingface.py
@ -0,0 +1,58 @@
 import pytest
 import torch
 from unittest.mock import Mock, patch
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from swarms.models.huggingface import HuggingfaceLLM
@pytest.fixture
 def huggingface_llm():
    # Create an instance of HuggingfaceLLM for testing.
    model_id = "gpt2-small"
    return HuggingfaceLLM(model_id=model_id)
 def test_initialization(huggingface_llm):
    # Test the initialization of the HuggingfaceLLM class.
    assert huggingface_llm.model_id == "gpt2-small"
    assert huggingface_llm.device in ["cpu", "cuda"]
    assert huggingface_llm.max_length == 20
    assert huggingface_llm.verbose == False
    assert huggingface_llm.distributed == False
    assert huggingface_llm.decoding == False
    assert huggingface_llm.model is None
    assert huggingface_llm.tokenizer is None
 def test_load_model(huggingface_llm):
    # Test loading the model.
    huggingface_llm.load_model()
    assert isinstance(huggingface_llm.model, AutoModelForCausalLM)
    assert isinstance(huggingface_llm.tokenizer, AutoTokenizer)
 def test_run(huggingface_llm):
    # Test the run method of HuggingfaceLLM.
    prompt_text = "Once upon a time"
    generated_text = huggingface_llm.run(prompt_text)
    assert isinstance(generated_text, str)
    assert len(generated_text) > 0
 def test_call_method(huggingface_llm):
    # Test the __call__ method of HuggingfaceLLM.
    prompt_text = "Once upon a time"
    generated_text = huggingface_llm(prompt_text)
    assert isinstance(generated_text, str)
    assert len(generated_text) > 0
 def test_load_model_failure():
    # Test loading model failure.
    with patch(
        "your_module.AutoModelForCausalLM.from_pretrained",
        side_effect=Exception("Model load failed"),
    ):
        with pytest.raises(Exception):
            huggingface_llm = HuggingfaceLLM(model_id="gpt2-small")
            huggingface_llm.load_model()