swarms/playground/examples/example_huggingfacellm.py

import torch

from swarms.models import HuggingfaceLLM

try:
    inference = HuggingfaceLLM(
        model_id="gpt2",
        quantize=False,
        verbose=True,
    )

    device = "cuda" if torch.cuda.is_available() else "cpu"
    inference.model.to(device)

    prompt_text = (
        "Create a list of known biggest risks of structural collapse"
        " with references"
    )
    inputs = inference.tokenizer(prompt_text, return_tensors="pt").to(
        device
    )

    generated_ids = inference.model.generate(
        **inputs,
        max_new_tokens=1000,  # Adjust the length of the generation
        temperature=0.7,  # Adjust creativity
        top_k=50,  # Limits the vocabulary considered at each step
        pad_token_id=inference.tokenizer.eos_token_id,
        do_sample=True,  # Enable sampling to utilize temperature
    )

    generated_text = inference.tokenizer.decode(
        generated_ids[0], skip_special_tokens=True
    )
    print(generated_text)
except Exception as e:
    print(f"An error occurred: {e}")