From 10028916ee26858764ec6d3012002a6a136ff051 Mon Sep 17 00:00:00 2001 From: vyomakesh09 Date: Sun, 25 Feb 2024 07:49:47 +0000 Subject: [PATCH] fixed hugging face llm --- examples/example_huggingfacellm.py | 41 +++++++++++++++++++----------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/examples/example_huggingfacellm.py b/examples/example_huggingfacellm.py index 64c08621..ca28df58 100644 --- a/examples/example_huggingfacellm.py +++ b/examples/example_huggingfacellm.py @@ -1,18 +1,29 @@ from swarms.models import HuggingfaceLLM +import torch -# Initialize with custom configuration -custom_config = { - "quantize": True, - "quantization_config": {"load_in_4bit": True}, - "verbose": True, -} -inference = HuggingfaceLLM( - model_id="NousResearch/Nous-Hermes-2-Vision-Alpha", **custom_config -) +try: + inference = HuggingfaceLLM( + model_id="gpt2", + quantize=False, + verbose=True, + ) -# Generate text based on a prompt -prompt_text = ( - "Create a list of known biggest risks of structural collapse with references" -) -generated_text = inference(prompt_text) -print(generated_text) + device = "cuda" if torch.cuda.is_available() else "cpu" + inference.model.to(device) + + prompt_text = "Create a list of known biggest risks of structural collapse with references" + inputs = inference.tokenizer(prompt_text, return_tensors="pt").to(device) + + generated_ids = inference.model.generate( + **inputs, + max_new_tokens=1000, # Adjust the length of the generation + temperature=0.7, # Adjust creativity + top_k=50, # Limits the vocabulary considered at each step + pad_token_id=inference.tokenizer.eos_token_id, + do_sample=True # Enable sampling to utilize temperature + ) + + generated_text = inference.tokenizer.decode(generated_ids[0], skip_special_tokens=True) + print(generated_text) +except Exception as e: + print(f"An error occurred: {e}")