parent
dc7a0e3741
commit
924bf9bd7f
@ -0,0 +1,153 @@
|
|||||||
|
## `HuggingfaceLLM` Documentation
|
||||||
|
|
||||||
|
### Introduction
|
||||||
|
|
||||||
|
The `HuggingfaceLLM` class is designed for running inference using models from the Hugging Face Transformers library. This documentation provides an in-depth understanding of the class, its purpose, attributes, methods, and usage examples.
|
||||||
|
|
||||||
|
#### Purpose
|
||||||
|
|
||||||
|
The `HuggingfaceLLM` class serves the following purposes:
|
||||||
|
|
||||||
|
1. Load pre-trained Hugging Face models and tokenizers.
|
||||||
|
2. Generate text-based responses from the loaded model using a given prompt.
|
||||||
|
3. Provide flexibility in device selection, quantization, and other configuration options.
|
||||||
|
|
||||||
|
### Class Definition
|
||||||
|
|
||||||
|
The `HuggingfaceLLM` class is defined as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class HuggingfaceLLM:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
device: str = None,
|
||||||
|
max_length: int = 20,
|
||||||
|
quantize: bool = False,
|
||||||
|
quantization_config: dict = None,
|
||||||
|
verbose=False,
|
||||||
|
distributed=False,
|
||||||
|
decoding=False,
|
||||||
|
):
|
||||||
|
# Attributes and initialization logic explained below
|
||||||
|
pass
|
||||||
|
|
||||||
|
def load_model(self):
|
||||||
|
# Method to load the pre-trained model and tokenizer
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run(self, prompt_text: str, max_length: int = None):
|
||||||
|
# Method to generate text-based responses
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __call__(self, prompt_text: str, max_length: int = None):
|
||||||
|
# Alternate method for generating text-based responses
|
||||||
|
pass
|
||||||
|
```
|
||||||
|
|
||||||
|
### Attributes
|
||||||
|
|
||||||
|
| Attribute | Description |
|
||||||
|
|----------------------|---------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| `model_id` | The ID of the pre-trained model to be used. |
|
||||||
|
| `device` | The device on which the model runs (`'cuda'` for GPU or `'cpu'` for CPU). |
|
||||||
|
| `max_length` | The maximum length of the generated text. |
|
||||||
|
| `quantize` | A boolean indicating whether quantization should be used. |
|
||||||
|
| `quantization_config`| A dictionary with configuration options for quantization. |
|
||||||
|
| `verbose` | A boolean indicating whether verbose logs should be printed. |
|
||||||
|
| `logger` | An optional logger for logging messages (defaults to a basic logger). |
|
||||||
|
| `distributed` | A boolean indicating whether distributed processing should be used. |
|
||||||
|
| `decoding` | A boolean indicating whether to perform decoding during text generation. |
|
||||||
|
|
||||||
|
### Class Methods
|
||||||
|
|
||||||
|
#### `__init__` Method
|
||||||
|
|
||||||
|
The `__init__` method initializes an instance of the `HuggingfaceLLM` class with the specified parameters. It also loads the pre-trained model and tokenizer.
|
||||||
|
|
||||||
|
- `model_id` (str): The ID of the pre-trained model to use.
|
||||||
|
- `device` (str, optional): The device to run the model on ('cuda' or 'cpu').
|
||||||
|
- `max_length` (int, optional): The maximum length of the generated text.
|
||||||
|
- `quantize` (bool, optional): Whether to use quantization.
|
||||||
|
- `quantization_config` (dict, optional): Configuration for quantization.
|
||||||
|
- `verbose` (bool, optional): Whether to print verbose logs.
|
||||||
|
- `logger` (logging.Logger, optional): The logger to use.
|
||||||
|
- `distributed` (bool, optional): Whether to use distributed processing.
|
||||||
|
- `decoding` (bool, optional): Whether to perform decoding during text generation.
|
||||||
|
|
||||||
|
#### `load_model` Method
|
||||||
|
|
||||||
|
The `load_model` method loads the pre-trained model and tokenizer specified by `model_id`.
|
||||||
|
|
||||||
|
#### `run` and `__call__` Methods
|
||||||
|
|
||||||
|
Both `run` and `__call__` methods generate text-based responses based on a given prompt. They accept the following parameters:
|
||||||
|
|
||||||
|
- `prompt_text` (str): The text prompt to initiate text generation.
|
||||||
|
- `max_length` (int, optional): The maximum length of the generated text.
|
||||||
|
|
||||||
|
### Usage Examples
|
||||||
|
|
||||||
|
Here are three ways to use the `HuggingfaceLLM` class:
|
||||||
|
|
||||||
|
#### Example 1: Basic Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
from your_module import HuggingfaceLLM
|
||||||
|
|
||||||
|
# Initialize the HuggingfaceLLM instance with a model ID
|
||||||
|
model_id = "gpt2-small"
|
||||||
|
inference = HuggingfaceLLM(model_id=model_id)
|
||||||
|
|
||||||
|
# Generate text based on a prompt
|
||||||
|
prompt_text = "Once upon a time"
|
||||||
|
generated_text = inference(prompt_text)
|
||||||
|
print(generated_text)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Example 2: Custom Configuration
|
||||||
|
|
||||||
|
```python
|
||||||
|
from your_module import HuggingfaceLLM
|
||||||
|
|
||||||
|
# Initialize with custom configuration
|
||||||
|
custom_config = {
|
||||||
|
"quantize": True,
|
||||||
|
"quantization_config": {"load_in_4bit": True},
|
||||||
|
"verbose": True
|
||||||
|
}
|
||||||
|
inference = HuggingfaceLLM(model_id="gpt2-small", **custom_config)
|
||||||
|
|
||||||
|
# Generate text based on a prompt
|
||||||
|
prompt_text = "Tell me a joke"
|
||||||
|
generated_text = inference(prompt_text)
|
||||||
|
print(generated_text)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Example 3: Distributed Processing
|
||||||
|
|
||||||
|
```python
|
||||||
|
from your_module import HuggingfaceLLM
|
||||||
|
|
||||||
|
# Initialize for distributed processing
|
||||||
|
inference = HuggingfaceLLM(model_id="gpt2-medium", distributed=True)
|
||||||
|
|
||||||
|
# Generate text based on a prompt
|
||||||
|
prompt_text = "Translate the following sentence to French"
|
||||||
|
generated_text = inference(prompt_text)
|
||||||
|
print(generated_text)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Additional Information
|
||||||
|
|
||||||
|
- The `HuggingfaceLLM` class provides the flexibility to load and use pre-trained models from the Hugging Face Transformers library.
|
||||||
|
- Quantization can be enabled to reduce model size and inference time.
|
||||||
|
- Distributed processing can be used for parallelized inference.
|
||||||
|
- Verbose logging can help in debugging and understanding the text generation process.
|
||||||
|
|
||||||
|
### References
|
||||||
|
|
||||||
|
- [Hugging Face Transformers Documentation](https://huggingface.co/transformers/)
|
||||||
|
- [PyTorch Documentation](https://pytorch.org/docs/stable/index.html)
|
||||||
|
|
||||||
|
This documentation provides a comprehensive understanding of the `HuggingfaceLLM` class, its attributes, methods, and usage examples. Developers can use this class to perform text generation tasks efficiently using pre-trained models from the Hugging Face Transformers library.
|
@ -0,0 +1,58 @@
|
|||||||
|
import pytest
|
||||||
|
import torch
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
||||||
|
from swarms.models.huggingface import HuggingfaceLLM
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def huggingface_llm():
|
||||||
|
# Create an instance of HuggingfaceLLM for testing.
|
||||||
|
model_id = "gpt2-small"
|
||||||
|
return HuggingfaceLLM(model_id=model_id)
|
||||||
|
|
||||||
|
|
||||||
|
def test_initialization(huggingface_llm):
|
||||||
|
# Test the initialization of the HuggingfaceLLM class.
|
||||||
|
assert huggingface_llm.model_id == "gpt2-small"
|
||||||
|
assert huggingface_llm.device in ["cpu", "cuda"]
|
||||||
|
assert huggingface_llm.max_length == 20
|
||||||
|
assert huggingface_llm.verbose == False
|
||||||
|
assert huggingface_llm.distributed == False
|
||||||
|
assert huggingface_llm.decoding == False
|
||||||
|
assert huggingface_llm.model is None
|
||||||
|
assert huggingface_llm.tokenizer is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_model(huggingface_llm):
|
||||||
|
# Test loading the model.
|
||||||
|
huggingface_llm.load_model()
|
||||||
|
assert isinstance(huggingface_llm.model, AutoModelForCausalLM)
|
||||||
|
assert isinstance(huggingface_llm.tokenizer, AutoTokenizer)
|
||||||
|
|
||||||
|
|
||||||
|
def test_run(huggingface_llm):
|
||||||
|
# Test the run method of HuggingfaceLLM.
|
||||||
|
prompt_text = "Once upon a time"
|
||||||
|
generated_text = huggingface_llm.run(prompt_text)
|
||||||
|
assert isinstance(generated_text, str)
|
||||||
|
assert len(generated_text) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_call_method(huggingface_llm):
|
||||||
|
# Test the __call__ method of HuggingfaceLLM.
|
||||||
|
prompt_text = "Once upon a time"
|
||||||
|
generated_text = huggingface_llm(prompt_text)
|
||||||
|
assert isinstance(generated_text, str)
|
||||||
|
assert len(generated_text) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_model_failure():
|
||||||
|
# Test loading model failure.
|
||||||
|
with patch(
|
||||||
|
"your_module.AutoModelForCausalLM.from_pretrained",
|
||||||
|
side_effect=Exception("Model load failed"),
|
||||||
|
):
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
huggingface_llm = HuggingfaceLLM(model_id="gpt2-small")
|
||||||
|
huggingface_llm.load_model()
|
Loading…
Reference in new issue