From 10028916ee26858764ec6d3012002a6a136ff051 Mon Sep 17 00:00:00 2001
From: vyomakesh09 <vyomkeshkes@gmail.com>
Date: Sun, 25 Feb 2024 07:49:47 +0000
Subject: [PATCH] fixed hugging face llm

---
 examples/example_huggingfacellm.py | 41 +++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/examples/example_huggingfacellm.py b/examples/example_huggingfacellm.py
index 64c08621..ca28df58 100644
--- a/examples/example_huggingfacellm.py
+++ b/examples/example_huggingfacellm.py
@@ -1,18 +1,29 @@
 from swarms.models import HuggingfaceLLM
+import torch
 
-# Initialize with custom configuration
-custom_config = {
-    "quantize": True,
-    "quantization_config": {"load_in_4bit": True},
-    "verbose": True,
-}
-inference = HuggingfaceLLM(
-    model_id="NousResearch/Nous-Hermes-2-Vision-Alpha", **custom_config
-)
+try:
+    inference = HuggingfaceLLM(
+        model_id="gpt2",
+        quantize=False,
+        verbose=True,
+    )
 
-# Generate text based on a prompt
-prompt_text = (
-    "Create a list of known biggest risks of structural collapse with references"
-)
-generated_text = inference(prompt_text)
-print(generated_text)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    inference.model.to(device)
+
+    prompt_text = "Create a list of known biggest risks of structural collapse with references"
+    inputs = inference.tokenizer(prompt_text, return_tensors="pt").to(device)
+    
+    generated_ids = inference.model.generate(
+        **inputs,
+        max_new_tokens=1000,  # Adjust the length of the generation
+        temperature=0.7,  # Adjust creativity
+        top_k=50,  # Limits the vocabulary considered at each step
+        pad_token_id=inference.tokenizer.eos_token_id,
+        do_sample=True  # Enable sampling to utilize temperature
+    )
+    
+    generated_text = inference.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+    print(generated_text)
+except Exception as e:
+    print(f"An error occurred: {e}")