diff --git a/swarms/models/vllm.py b/swarms/models/vllm.py
index 0ea4be4a..58745a75 100644
--- a/swarms/models/vllm.py
+++ b/swarms/models/vllm.py
@@ -1,12 +1,19 @@
+import torch
 from swarms.models.base_llm import AbstractLLM
+import subprocess
 
-try:
-    from vllm import LLM, SamplingParams
-except ImportError as error:
-    print(f"[ERROR] [vLLM] {error}")
-    # subprocess.run(["pip", "install", "vllm"])
-    # raise error
-    raise error
+if torch.cuda.is_available() or torch.cuda.device_count() > 0:
+    # Download vllm with pip
+    try:
+        subprocess.run(["pip", "install", "vllm"])
+        from vllm import LLM, SamplingParams
+    except Exception as error:
+        print(f"[ERROR] [vLLM] {error}")
+        raise error
+else:
+    from swarms.models.huggingface import HuggingfaceLLM as LLM
+
+    SamplingParams = None
 
 
 class vLLM(AbstractLLM):
@@ -83,8 +90,9 @@ class vLLM(AbstractLLM):
             _type_: _description_
         """
         try:
-            outputs = self.llm.generate(task, self.sampling_params)
-            return outputs
+            return self.llm.generate(
+                task, self.sampling_params, *args, **kwargs
+            )
         except Exception as error:
             print(f"[ERROR] [vLLM] [run] {error}")
             raise error
diff --git a/swarms/utils/device_checker_cuda.py b/swarms/utils/device_checker_cuda.py
new file mode 100644
index 00000000..b178ef1e
--- /dev/null
+++ b/swarms/utils/device_checker_cuda.py
@@ -0,0 +1,70 @@
+import torch
+import logging
+from typing import Union, List, Any
+from torch.cuda import memory_allocated, memory_reserved
+
+
+def check_device(
+    log_level: Any = logging.INFO,
+    memory_threshold: float = 0.8,
+    capability_threshold: float = 3.5,
+    return_type: str = "list",
+) -> Union[torch.device, List[torch.device]]:
+    """
+    Checks for the availability of CUDA and returns the appropriate device(s).
+    If CUDA is not available, returns a CPU device.
+    If CUDA is available, returns a list of all available GPU devices.
+    """
+    logging.basicConfig(level=log_level)
+
+    # Check for CUDA availability
+    try:
+        if not torch.cuda.is_available():
+            logging.info("CUDA is not available. Using CPU...")
+            return torch.device("cpu")
+    except Exception as e:
+        logging.error("Error checking for CUDA availability: ", e)
+        return torch.device("cpu")
+
+    logging.info("CUDA is available.")
+
+    # Check for multiple GPUs
+    num_gpus = torch.cuda.device_count()
+    devices = []
+    if num_gpus > 1:
+        logging.info(f"Multiple GPUs available: {num_gpus}")
+        devices = [torch.device(f"cuda:{i}") for i in range(num_gpus)]
+    else:
+        logging.info("Only one GPU is available.")
+        devices = [torch.device("cuda")]
+
+    # Check additional properties for each device
+    for device in devices:
+        try:
+            torch.cuda.set_device(device)
+            capability = torch.cuda.get_device_capability(device)
+            total_memory = torch.cuda.get_device_properties(
+                device
+            ).total_memory
+            allocated_memory = memory_allocated(device)
+            reserved_memory = memory_reserved(device)
+            device_name = torch.cuda.get_device_name(device)
+
+            logging.info(
+                f"Device: {device}, Name: {device_name}, Compute"
+                f" Capability: {capability}, Total Memory:"
+                f" {total_memory}, Allocated Memory:"
+                f" {allocated_memory}, Reserved Memory:"
+                f" {reserved_memory}"
+            )
+        except Exception as e:
+            logging.error(
+                f"Error retrieving properties for device {device}: ",
+                e,
+            )
+
+    return devices
+
+
+devices = check_device()
+logging.info(f"Using device(s): {devices}")
diff --git a/tests/utils/test_device.py b/tests/utils/test_device.py
new file mode 100644
index 00000000..14399de9
--- /dev/null
+++ b/tests/utils/test_device.py
@@ -0,0 +1,111 @@
+import torch
+from unittest.mock import MagicMock
+import pytest
+from swarms.utils.device_checker_cuda import check_device
+
+
+def test_cuda_not_available(mocker):
+    mocker.patch("torch.cuda.is_available", return_value=False)
+    device = check_device()
+    assert str(device) == "cpu"
+
+
+def test_single_gpu_available(mocker):
+    mocker.patch("torch.cuda.is_available", return_value=True)
+    mocker.patch("torch.cuda.device_count", return_value=1)
+    devices = check_device()
+    assert len(devices) == 1
+    assert str(devices[0]) == "cuda"
+
+
+def test_multiple_gpus_available(mocker):
+    mocker.patch("torch.cuda.is_available", return_value=True)
+    mocker.patch("torch.cuda.device_count", return_value=2)
+    devices = check_device()
+    assert len(devices) == 2
+    assert str(devices[0]) == "cuda:0"
+    assert str(devices[1]) == "cuda:1"
+
+
+def test_device_properties(mocker):
+    mocker.patch("torch.cuda.is_available", return_value=True)
+    mocker.patch("torch.cuda.device_count", return_value=1)
+    mocker.patch(
+        "torch.cuda.get_device_capability", return_value=(7, 5)
+    )
+    mocker.patch(
+        "torch.cuda.get_device_properties",
+        return_value=MagicMock(total_memory=1000),
+    )
+    mocker.patch("torch.cuda.memory_allocated", return_value=200)
+    mocker.patch("torch.cuda.memory_reserved", return_value=300)
+    mocker.patch(
+        "torch.cuda.get_device_name", return_value="Tesla K80"
+    )
+    devices = check_device()
+    assert len(devices) == 1
+    assert str(devices[0]) == "cuda"
+
+
+def test_memory_threshold(mocker):
+    mocker.patch("torch.cuda.is_available", return_value=True)
+    mocker.patch("torch.cuda.device_count", return_value=1)
+    mocker.patch(
+        "torch.cuda.get_device_capability", return_value=(7, 5)
+    )
+    mocker.patch(
+        "torch.cuda.get_device_properties",
+        return_value=MagicMock(total_memory=1000),
+    )
+    mocker.patch(
+        "torch.cuda.memory_allocated", return_value=900
+    )  # 90% of total memory
+    mocker.patch("torch.cuda.memory_reserved", return_value=300)
+    mocker.patch(
+        "torch.cuda.get_device_name", return_value="Tesla K80"
+    )
+    with pytest.warns(
+        UserWarning,
+        match=r"Memory usage for device cuda exceeds threshold",
+    ):
+        devices = check_device(
+            memory_threshold=0.8
+        )  # Set memory threshold to 80%
+    assert len(devices) == 1
+    assert str(devices[0]) == "cuda"
+
+
+def test_compute_capability_threshold(mocker):
+    mocker.patch("torch.cuda.is_available", return_value=True)
+    mocker.patch("torch.cuda.device_count", return_value=1)
+    mocker.patch(
+        "torch.cuda.get_device_capability", return_value=(3, 0)
+    )  # Compute capability 3.0
+    mocker.patch(
+        "torch.cuda.get_device_properties",
+        return_value=MagicMock(total_memory=1000),
+    )
+    mocker.patch("torch.cuda.memory_allocated", return_value=200)
+    mocker.patch("torch.cuda.memory_reserved", return_value=300)
+    mocker.patch(
+        "torch.cuda.get_device_name", return_value="Tesla K80"
+    )
+    with pytest.warns(
+        UserWarning,
+        match=(
+            r"Compute capability for device cuda is below threshold"
+        ),
+    ):
+        devices = check_device(
+            capability_threshold=3.5
+        )  # Set compute capability threshold to 3.5
+    assert len(devices) == 1
+    assert str(devices[0]) == "cuda"
+
+
+def test_return_single_device(mocker):
+    mocker.patch("torch.cuda.is_available", return_value=True)
+    mocker.patch("torch.cuda.device_count", return_value=2)
+    device = check_device(return_type="single")
+    assert isinstance(device, torch.device)
+    assert str(device) == "cuda:0"