from swarms.models import HuggingfaceLLM import torch try: inference = HuggingfaceLLM( model_id="gpt2", quantize=False, verbose=True, ) device = "cuda" if torch.cuda.is_available() else "cpu" inference.model.to(device) prompt_text = "Create a list of known biggest risks of structural collapse with references" inputs = inference.tokenizer(prompt_text, return_tensors="pt").to(device) generated_ids = inference.model.generate( **inputs, max_new_tokens=1000, # Adjust the length of the generation temperature=0.7, # Adjust creativity top_k=50, # Limits the vocabulary considered at each step pad_token_id=inference.tokenizer.eos_token_id, do_sample=True # Enable sampling to utilize temperature ) generated_text = inference.tokenizer.decode(generated_ids[0], skip_special_tokens=True) print(generated_text) except Exception as e: print(f"An error occurred: {e}")