|
|
|
@ -386,6 +386,23 @@ generated_text = inference(prompt_text)
|
|
|
|
|
print(generated_text)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### Mixtral
|
|
|
|
|
- Utilize Mixtral in a very simple API,
|
|
|
|
|
- Utilize 4bit quantization for a increased speed and less memory usage
|
|
|
|
|
- Use Flash Attention 2.0 for increased speed and less memory usage
|
|
|
|
|
```python
|
|
|
|
|
from swarms.models import Mixtral
|
|
|
|
|
|
|
|
|
|
# Initialize the Mixtral model with 4 bit and flash attention!
|
|
|
|
|
mixtral = Mixtral(load_in_4bit=True, use_flash_attention_2=True)
|
|
|
|
|
|
|
|
|
|
# Generate text for a simple task
|
|
|
|
|
generated_text = mixtral.run("Generate a creative story.")
|
|
|
|
|
|
|
|
|
|
# Print the generated text
|
|
|
|
|
print(generated_text)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
# Features 🤖
|
|
|
|
|