diff --git a/swarms/models/vllm.py b/swarms/models/vllm.py index 0ea4be4a..58745a75 100644 --- a/swarms/models/vllm.py +++ b/swarms/models/vllm.py @@ -1,12 +1,19 @@ +import torch from swarms.models.base_llm import AbstractLLM +import subprocess -try: - from vllm import LLM, SamplingParams -except ImportError as error: - print(f"[ERROR] [vLLM] {error}") - # subprocess.run(["pip", "install", "vllm"]) - # raise error - raise error +if torch.cuda.is_available() or torch.cuda.device_count() > 0: + # Download vllm with pip + try: + subprocess.run(["pip", "install", "vllm"]) + from vllm import LLM, SamplingParams + except Exception as error: + print(f"[ERROR] [vLLM] {error}") + raise error +else: + from swarms.models.huggingface import HuggingfaceLLM as LLM + + SamplingParams = None class vLLM(AbstractLLM): @@ -83,8 +90,9 @@ class vLLM(AbstractLLM): _type_: _description_ """ try: - outputs = self.llm.generate(task, self.sampling_params) - return outputs + return self.llm.generate( + task, self.sampling_params, *args, **kwargs + ) except Exception as error: print(f"[ERROR] [vLLM] [run] {error}") raise error diff --git a/swarms/utils/device_checker_cuda.py b/swarms/utils/device_checker_cuda.py new file mode 100644 index 00000000..b178ef1e --- /dev/null +++ b/swarms/utils/device_checker_cuda.py @@ -0,0 +1,70 @@ +import torch +import logging +from typing import Union, List, Any +from torch.cuda import memory_allocated, memory_reserved + + +def check_device( + log_level: Any = logging.INFO, + memory_threshold: float = 0.8, + capability_threshold: float = 3.5, + return_type: str = "list", +) -> Union[torch.device, List[torch.device]]: + """ + Checks for the availability of CUDA and returns the appropriate device(s). + If CUDA is not available, returns a CPU device. + If CUDA is available, returns a list of all available GPU devices. + """ + logging.basicConfig(level=log_level) + + # Check for CUDA availability + try: + if not torch.cuda.is_available(): + logging.info("CUDA is not available. Using CPU...") + return torch.device("cpu") + except Exception as e: + logging.error("Error checking for CUDA availability: ", e) + return torch.device("cpu") + + logging.info("CUDA is available.") + + # Check for multiple GPUs + num_gpus = torch.cuda.device_count() + devices = [] + if num_gpus > 1: + logging.info(f"Multiple GPUs available: {num_gpus}") + devices = [torch.device(f"cuda:{i}") for i in range(num_gpus)] + else: + logging.info("Only one GPU is available.") + devices = [torch.device("cuda")] + + # Check additional properties for each device + for device in devices: + try: + torch.cuda.set_device(device) + capability = torch.cuda.get_device_capability(device) + total_memory = torch.cuda.get_device_properties( + device + ).total_memory + allocated_memory = memory_allocated(device) + reserved_memory = memory_reserved(device) + device_name = torch.cuda.get_device_name(device) + + logging.info( + f"Device: {device}, Name: {device_name}, Compute" + f" Capability: {capability}, Total Memory:" + f" {total_memory}, Allocated Memory:" + f" {allocated_memory}, Reserved Memory:" + f" {reserved_memory}" + ) + except Exception as e: + logging.error( + f"Error retrieving properties for device {device}: ", + e, + ) + + return devices + + +devices = check_device() +logging.info(f"Using device(s): {devices}") diff --git a/tests/utils/test_device.py b/tests/utils/test_device.py new file mode 100644 index 00000000..14399de9 --- /dev/null +++ b/tests/utils/test_device.py @@ -0,0 +1,111 @@ +import torch +from unittest.mock import MagicMock +import pytest +from swarms.utils.device_checker_cuda import check_device + + +def test_cuda_not_available(mocker): + mocker.patch("torch.cuda.is_available", return_value=False) + device = check_device() + assert str(device) == "cpu" + + +def test_single_gpu_available(mocker): + mocker.patch("torch.cuda.is_available", return_value=True) + mocker.patch("torch.cuda.device_count", return_value=1) + devices = check_device() + assert len(devices) == 1 + assert str(devices[0]) == "cuda" + + +def test_multiple_gpus_available(mocker): + mocker.patch("torch.cuda.is_available", return_value=True) + mocker.patch("torch.cuda.device_count", return_value=2) + devices = check_device() + assert len(devices) == 2 + assert str(devices[0]) == "cuda:0" + assert str(devices[1]) == "cuda:1" + + +def test_device_properties(mocker): + mocker.patch("torch.cuda.is_available", return_value=True) + mocker.patch("torch.cuda.device_count", return_value=1) + mocker.patch( + "torch.cuda.get_device_capability", return_value=(7, 5) + ) + mocker.patch( + "torch.cuda.get_device_properties", + return_value=MagicMock(total_memory=1000), + ) + mocker.patch("torch.cuda.memory_allocated", return_value=200) + mocker.patch("torch.cuda.memory_reserved", return_value=300) + mocker.patch( + "torch.cuda.get_device_name", return_value="Tesla K80" + ) + devices = check_device() + assert len(devices) == 1 + assert str(devices[0]) == "cuda" + + +def test_memory_threshold(mocker): + mocker.patch("torch.cuda.is_available", return_value=True) + mocker.patch("torch.cuda.device_count", return_value=1) + mocker.patch( + "torch.cuda.get_device_capability", return_value=(7, 5) + ) + mocker.patch( + "torch.cuda.get_device_properties", + return_value=MagicMock(total_memory=1000), + ) + mocker.patch( + "torch.cuda.memory_allocated", return_value=900 + ) # 90% of total memory + mocker.patch("torch.cuda.memory_reserved", return_value=300) + mocker.patch( + "torch.cuda.get_device_name", return_value="Tesla K80" + ) + with pytest.warns( + UserWarning, + match=r"Memory usage for device cuda exceeds threshold", + ): + devices = check_device( + memory_threshold=0.8 + ) # Set memory threshold to 80% + assert len(devices) == 1 + assert str(devices[0]) == "cuda" + + +def test_compute_capability_threshold(mocker): + mocker.patch("torch.cuda.is_available", return_value=True) + mocker.patch("torch.cuda.device_count", return_value=1) + mocker.patch( + "torch.cuda.get_device_capability", return_value=(3, 0) + ) # Compute capability 3.0 + mocker.patch( + "torch.cuda.get_device_properties", + return_value=MagicMock(total_memory=1000), + ) + mocker.patch("torch.cuda.memory_allocated", return_value=200) + mocker.patch("torch.cuda.memory_reserved", return_value=300) + mocker.patch( + "torch.cuda.get_device_name", return_value="Tesla K80" + ) + with pytest.warns( + UserWarning, + match=( + r"Compute capability for device cuda is below threshold" + ), + ): + devices = check_device( + capability_threshold=3.5 + ) # Set compute capability threshold to 3.5 + assert len(devices) == 1 + assert str(devices[0]) == "cuda" + + +def test_return_single_device(mocker): + mocker.patch("torch.cuda.is_available", return_value=True) + mocker.patch("torch.cuda.device_count", return_value=2) + device = check_device(return_type="single") + assert isinstance(device, torch.device) + assert str(device) == "cuda:0"