|
|
|
@ -915,23 +915,35 @@ print(completion_2)
|
|
|
|
|
### `HuggingFaceLLM`
|
|
|
|
|
```python
|
|
|
|
|
from swarms.models import HuggingfaceLLM
|
|
|
|
|
import torch
|
|
|
|
|
|
|
|
|
|
# Initialize with custom configuration
|
|
|
|
|
custom_config = {
|
|
|
|
|
"quantize": True,
|
|
|
|
|
"quantization_config": {"load_in_4bit": True},
|
|
|
|
|
"verbose": True,
|
|
|
|
|
}
|
|
|
|
|
inference = HuggingfaceLLM(
|
|
|
|
|
model_id="NousResearch/Nous-Hermes-2-Vision-Alpha", **custom_config
|
|
|
|
|
)
|
|
|
|
|
try:
|
|
|
|
|
inference = HuggingfaceLLM(
|
|
|
|
|
model_id="gpt2",
|
|
|
|
|
quantize=False,
|
|
|
|
|
verbose=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
inference.model.to(device)
|
|
|
|
|
|
|
|
|
|
prompt_text = "Create a list of known biggest risks of structural collapse with references"
|
|
|
|
|
inputs = inference.tokenizer(prompt_text, return_tensors="pt").to(device)
|
|
|
|
|
|
|
|
|
|
generated_ids = inference.model.generate(
|
|
|
|
|
**inputs,
|
|
|
|
|
max_new_tokens=1000, # Adjust the length of the generation
|
|
|
|
|
temperature=0.7, # Adjust creativity
|
|
|
|
|
top_k=50, # Limits the vocabulary considered at each step
|
|
|
|
|
pad_token_id=inference.tokenizer.eos_token_id,
|
|
|
|
|
do_sample=True # Enable sampling to utilize temperature
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
generated_text = inference.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
|
|
|
|
|
print(generated_text)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"An error occurred: {e}")
|
|
|
|
|
|
|
|
|
|
# Generate text based on a prompt
|
|
|
|
|
prompt_text = (
|
|
|
|
|
"Create a list of known biggest risks of structural collapse with references"
|
|
|
|
|
)
|
|
|
|
|
generated_text = inference(prompt_text)
|
|
|
|
|
print(generated_text)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### `Mixtral`
|
|
|
|
|