from swarms.models import HuggingfaceLLM
import torch

try:
    inference = HuggingfaceLLM(
        model_id="gpt2",
        quantize=False,
        verbose=True,
    )

    device = "cuda" if torch.cuda.is_available() else "cpu"
    inference.model.to(device)

    prompt_text = "Create a list of known biggest risks of structural collapse with references"
    inputs = inference.tokenizer(prompt_text, return_tensors="pt").to(device)
    
    generated_ids = inference.model.generate(
        **inputs,
        max_new_tokens=1000,  # Adjust the length of the generation
        temperature=0.7,  # Adjust creativity
        top_k=50,  # Limits the vocabulary considered at each step
        pad_token_id=inference.tokenizer.eos_token_id,
        do_sample=True  # Enable sampling to utilize temperature
    )
    
    generated_text = inference.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    print(generated_text)
except Exception as e:
    print(f"An error occurred: {e}")