From 46bc36629fe65e411e36eaaf5477be908ee8c43b Mon Sep 17 00:00:00 2001 From: Kye Date: Thu, 9 Nov 2023 19:14:29 -0500 Subject: [PATCH] Dockerfule Former-commit-id: 4596ddc5ff3c7756c95385d030b967d61e4eb536 --- Dockerfile | 45 +++--- docs/docker_setup.md | 197 +++++++++++++++++++++++++++ mkdocs.yml | 1 + pyproject.toml | 1 - requirements.txt | 1 - {demos => swarms/models}/autotemp.py | 4 +- tests/Dockerfile | 33 +++++ tests/models/auto_temp.py | 76 +++++++++++ tests/models/distill_whisper.py | 154 +++++++++++++++++++++ tests/models/jina_embeds.py | 82 +++++++++++ 10 files changed, 572 insertions(+), 22 deletions(-) create mode 100644 docs/docker_setup.md rename {demos => swarms/models}/autotemp.py (98%) create mode 100644 tests/Dockerfile create mode 100644 tests/models/auto_temp.py create mode 100644 tests/models/distill_whisper.py create mode 100644 tests/models/jina_embeds.py diff --git a/Dockerfile b/Dockerfile index 1ce589ae..aa11856d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,31 +1,42 @@ + +# ================================== # Use an official Python runtime as a parent image FROM python:3.9-slim -# Set environment variables to make Python output unbuffered and disable the PIP cache +# Set environment variables ENV PYTHONDONTWRITEBYTECODE 1 ENV PYTHONUNBUFFERED 1 -ENV PIP_NO_CACHE_DIR off -ENV PIP_DISABLE_PIP_VERSION_CHECK on -ENV PIP_DEFAULT_TIMEOUT 100 # Set the working directory in the container -WORKDIR /usr/src/app +WORKDIR /usr/src/swarm_cloud + + +# Install Python dependencies +# COPY requirements.txt and pyproject.toml if you're using poetry for dependency management +COPY requirements.txt . +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt -# Copy the current directory contents into the container at /usr/src/app +# Install the 'swarms' package, assuming it's available on PyPI +RUN pip install swarms + +# Copy the rest of the application COPY . . -# Install Poetry -RUN pip install poetry +# Add entrypoint script if needed +# COPY ./entrypoint.sh . +# RUN chmod +x /usr/src/swarm_cloud/entrypoint.sh -# Disable virtualenv creation by poetry and install dependencies -RUN poetry config virtualenvs.create false -RUN poetry install --no-interaction --no-ansi +# Expose port if your application has a web interface +# EXPOSE 5000 -# Install the 'swarms' package if it's not included in the poetry.lock -RUN pip install swarms +# # Define environment variable for the swarm to work +# ENV SWARM_API_KEY=your_swarm_api_key_here -# Assuming tests require pytest to run -RUN pip install pytest +# # Add Docker CMD or ENTRYPOINT script to run the application +# CMD python your_swarm_startup_script.py +# Or use the entrypoint script if you have one +# ENTRYPOINT ["/usr/src/swarm_cloud/entrypoint.sh"] -# Run pytest on all tests in the tests directory -CMD find ./tests -name '*.py' -exec pytest {} + +# If you're using `CMD` to execute a Python script, make sure it's executable +# RUN chmod +x your_swarm_startup_script.py diff --git a/docs/docker_setup.md b/docs/docker_setup.md new file mode 100644 index 00000000..409f9119 --- /dev/null +++ b/docs/docker_setup.md @@ -0,0 +1,197 @@ +# Docker Setup Guide for Contributors to Swarms + +## Introduction + +Welcome to the `swarms` project Docker setup guide. This document will help you establish a Docker-based environment for contributing to `swarms`. Docker provides a consistent and isolated environment, ensuring that all contributors can work in the same settings, reducing the "it works on my machine" syndrome. + +### Purpose + +The purpose of this guide is to: + +- Ensure contributors can quickly set up their development environment. +- Provide a consistent testing and deployment workflow. +- Introduce Docker basics and best practices. + +### Scope + +This guide covers: + +- Installing Docker +- Cloning the `swarms` repository +- Building a Docker image +- Running the `swarms` application in a Docker container +- Running tests using Docker +- Pushing changes and working with Docker Hub + +### Audience + +This guide is intended for developers and contributors to the `swarms` project who have basic knowledge of version control with Git and programming in Python. + +## Prerequisites + +Before you begin, ensure you have: +- A GitHub account +- Git installed on your machine +- Basic command-line proficiency + +## Docker Installation + +### Windows + +1. Download Docker Desktop for Windows from the official website. +2. Install Docker Desktop, ensuring that the "Use Windows containers instead of Linux containers" option is unchecked. +3. Start Docker Desktop and wait for the Docker engine to start. + +### macOS + +1. Download Docker Desktop for macOS from the official website. +2. Follow the installation instructions, drag-and-drop Docker into the Applications folder. +3. Start Docker Desktop from the Applications folder. + +### Linux (Ubuntu) + +1. Update your package index: `sudo apt-get update`. +2. Install packages to allow apt to use a repository over HTTPS. +3. Add Docker’s official GPG key. +4. Set up the stable repository. +5. Install the latest version of Docker Engine and containerd. + +```bash +sudo apt-get install docker-ce docker-ce-cli containerd.io +``` + +6. Verify that Docker Engine is installed correctly by running the hello-world image. + +```bash +sudo docker run hello-world +``` + +### Post-installation Steps for Linux + +- Manage Docker as a non-root user. +- Configure Docker to start on boot. + +## Cloning the Repository + +```bash +git clone https://github.com/your-username/swarms.git +cd swarms +``` + +## Docker Basics + +### Dockerfile Overview + +- Explain the structure and commands of a Dockerfile used in the `swarms` project. + +### Building the Image + +```bash +docker build -t swarms-dev . +``` + +### Running a Container + +```bash +docker run -it --rm swarms-dev +``` + +## Development Workflow with Docker + +### Running the Application + +- Commands to run the `swarms` application within Docker. + +### Making Changes + +- How to make changes to the code and reflect those changes within the Docker container. + +### Running Tests + +- Instructions on running tests using `pytest` within the Docker environment. + +## Docker Compose for Local Development + +- Introduce Docker Compose and its role in simplifying multi-container setups. +- Create a `docker-compose.yml` file for the `swarms` project. + + +## Dockerfile + +Creating a Dockerfile for deploying the `swarms` framework to the cloud involves setting up the necessary environment to run your Python application, ensuring all dependencies are installed, and configuring the container to execute the desired tasks. Here's an example Dockerfile that sets up such an environment: + +```Dockerfile +# Use an official Python runtime as a parent image +FROM python:3.9-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +# Set the working directory in the container +WORKDIR /usr/src/swarm_cloud + +# Install system dependencies +RUN apt-get update \ + && apt-get -y install netcat gcc \ + && apt-get clean + +# Install Python dependencies +# COPY requirements.txt and pyproject.toml if you're using poetry for dependency management +COPY requirements.txt . +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt + +# Install the 'swarms' package, assuming it's available on PyPI +RUN pip install swarms + +# Copy the rest of the application +COPY . . + +# Add entrypoint script if needed +# COPY ./entrypoint.sh . +# RUN chmod +x /usr/src/swarm_cloud/entrypoint.sh + +# Expose port if your application has a web interface +# EXPOSE 5000 + +# Define environment variable for the swarm to work +ENV SWARM_API_KEY=your_swarm_api_key_here + +# Add Docker CMD or ENTRYPOINT script to run the application +# CMD python your_swarm_startup_script.py +# Or use the entrypoint script if you have one +# ENTRYPOINT ["/usr/src/swarm_cloud/entrypoint.sh"] + +# If you're using `CMD` to execute a Python script, make sure it's executable +# RUN chmod +x your_swarm_startup_script.py +``` + +To build and run this Docker image: + +1. Replace `requirements.txt` with your actual requirements file or `pyproject.toml` and `poetry.lock` if you're using Poetry. +2. Replace `your_swarm_startup_script.py` with the script that starts your application. +3. If your application requires an API key or other sensitive data, make sure to set these securely, perhaps using environment variables or secrets management solutions provided by your cloud provider. +4. If you have an entrypoint script, uncomment the `COPY` and `RUN` lines for `entrypoint.sh`. +5. If your application has a web interface, uncomment the `EXPOSE` line and set it to the correct port. + +Now, build your Docker image: + +```sh +docker build -t swarm-cloud . +``` + +And run it: + +```sh +docker run -d --name my-swarm-app swarm-cloud +``` + +For deploying to the cloud, you'll need to push your Docker image to a container registry (like Docker Hub or a private registry), then pull it from your cloud environment to run it. Cloud providers often have services specifically for this purpose (like AWS ECS, GCP GKE, or Azure AKS). The deployment process will involve: + +- Pushing the image to a registry. +- Configuring cloud services to run your image. +- Setting up networking, storage, and other cloud resources. +- Monitoring, logging, and potentially scaling your containers. + +Remember to secure sensitive data, use tagged releases for your images, and follow best practices for operating in the cloud. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 3a212201..7b331f02 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -61,6 +61,7 @@ nav: - Home: - Overview: "index.md" - Contributing: "contributing.md" + - Docker Container Setup: "docker_setup.md" - Swarms: - Overview: "swarms/index.md" - swarms.swarms: diff --git a/pyproject.toml b/pyproject.toml index c44cf9dc..9af0ab78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,6 @@ tiktoken = "*" attrs = "*" ggl = "*" ratelimit = "*" - beautifulsoup4 = "*" huggingface-hub = "*" pydantic = "*" diff --git a/requirements.txt b/requirements.txt index 5cb854b9..d28e75e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,7 +31,6 @@ agent-protocol accelerate chromadb tiktoken -open-interpreter tabulate colored griptape diff --git a/demos/autotemp.py b/swarms/models/autotemp.py similarity index 98% rename from demos/autotemp.py rename to swarms/models/autotemp.py index dcde42d3..d238e117 100644 --- a/demos/autotemp.py +++ b/swarms/models/autotemp.py @@ -1,7 +1,6 @@ import re from concurrent.futures import ThreadPoolExecutor, as_completed -from swarms.models import OpenAIChat - +from swarms.models.auto_temp import OpenAIChat class AutoTempAgent: """ @@ -32,7 +31,6 @@ class AutoTempAgent: Generate a 10,000 word blog on mental clarity and the benefits of meditation. """ - def __init__( self, temperature: float = 0.5, diff --git a/tests/Dockerfile b/tests/Dockerfile new file mode 100644 index 00000000..b36c8d25 --- /dev/null +++ b/tests/Dockerfile @@ -0,0 +1,33 @@ +# TESTING +# -================== +# Use an official Python runtime as a parent image +FROM python:3.9-slim + +# Set environment variables to make Python output unbuffered and disable the PIP cache +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 +ENV PIP_NO_CACHE_DIR off +ENV PIP_DISABLE_PIP_VERSION_CHECK on +ENV PIP_DEFAULT_TIMEOUT 100 + +# Set the working directory in the container +WORKDIR /usr/src/app + +# Copy the current directory contents into the container at /usr/src/app +COPY . . + +# Install Poetry +RUN pip install poetry + +# Disable virtualenv creation by poetry and install dependencies +RUN poetry config virtualenvs.create false +RUN poetry install --no-interaction --no-ansi + +# Install the 'swarms' package if it's not included in the poetry.lock +RUN pip install swarms + +# Assuming tests require pytest to run +RUN pip install pytest + +# Run pytest on all tests in the tests directory +CMD find ./tests -name '*.py' -exec pytest {} + diff --git a/tests/models/auto_temp.py b/tests/models/auto_temp.py new file mode 100644 index 00000000..14468379 --- /dev/null +++ b/tests/models/auto_temp.py @@ -0,0 +1,76 @@ +import os +from concurrent.futures import ThreadPoolExecutor +from unittest.mock import Mock, patch + +import pytest +from dotenv import load_dotenv + +from swarms.models.autotemp import AutoTempAgent + +api_key = os.getenv("OPENAI_API_KEY") + +load_dotenv() + +@pytest.fixture +def auto_temp_agent(): + return AutoTempAgent(api_key=api_key) + + +def test_initialization(auto_temp_agent): + assert isinstance(auto_temp_agent, AutoTempAgent) + assert auto_temp_agent.auto_select is True + assert auto_temp_agent.max_workers == 6 + assert auto_temp_agent.temperature == 0.5 + assert auto_temp_agent.alt_temps == [0.4, 0.6, 0.8, 1.0, 1.2, 1.4] + + +def test_evaluate_output(auto_temp_agent): + output = "This is a test output." + with patch("swarms.models.OpenAIChat") as MockOpenAIChat: + mock_instance = MockOpenAIChat.return_value + mock_instance.return_value = "Score: 95.5" + score = auto_temp_agent.evaluate_output(output) + assert score == 95.5 + mock_instance.assert_called_once() + + +def test_run_auto_select(auto_temp_agent): + task = "Generate a blog post." + temperature_string = "0.4,0.6,0.8,1.0,1.2,1.4" + result = auto_temp_agent.run(task, temperature_string) + assert "Best AutoTemp Output" in result + assert "Temp" in result + assert "Score" in result + + +def test_run_no_scores(auto_temp_agent): + task = "Invalid task." + temperature_string = "0.4,0.6,0.8,1.0,1.2,1.4" + with ThreadPoolExecutor(max_workers=auto_temp_agent.max_workers) as executor: + with patch.object(executor, "submit", side_effect=[None, None, None, None, None, None]): + result = auto_temp_agent.run(task, temperature_string) + assert result == "No valid outputs generated." + + +def test_run_manual_select(auto_temp_agent): + auto_temp_agent.auto_select = False + task = "Generate a blog post." + temperature_string = "0.4,0.6,0.8,1.0,1.2,1.4" + result = auto_temp_agent.run(task, temperature_string) + assert "Best AutoTemp Output" not in result + assert "Temp" in result + assert "Score" in result + + +def test_failed_initialization(): + with pytest.raises(Exception): + AutoTempAgent() + + +def test_failed_evaluate_output(auto_temp_agent): + output = "This is a test output." + with patch("swarms.models.OpenAIChat") as MockOpenAIChat: + mock_instance = MockOpenAIChat.return_value + mock_instance.return_value = "Invalid score text" + score = auto_temp_agent.evaluate_output(output) + assert score == 0.0 diff --git a/tests/models/distill_whisper.py b/tests/models/distill_whisper.py new file mode 100644 index 00000000..6fbfccd1 --- /dev/null +++ b/tests/models/distill_whisper.py @@ -0,0 +1,154 @@ +import os +import tempfile +from functools import wraps +from unittest.mock import patch + +import numpy as np +import pytest +import torch + +from swarms.models.distill_whisperx import DistilWhisperModel, async_retry + + +@pytest.fixture +def distil_whisper_model(): + return DistilWhisperModel() + + +def create_audio_file(data: np.ndarray, sample_rate: int, file_path: str): + data.tofile(file_path) + return file_path + + +def test_initialization(distil_whisper_model): + assert isinstance(distil_whisper_model, DistilWhisperModel) + assert isinstance(distil_whisper_model.model, torch.nn.Module) + assert isinstance(distil_whisper_model.processor, torch.nn.Module) + assert distil_whisper_model.device in ["cpu", "cuda:0"] + + +def test_transcribe_audio_file(distil_whisper_model): + test_data = np.random.rand(16000) # Simulated audio data (1 second) + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file: + audio_file_path = create_audio_file(test_data, 16000, audio_file.name) + transcription = distil_whisper_model.transcribe(audio_file_path) + os.remove(audio_file_path) + + assert isinstance(transcription, str) + assert transcription.strip() != "" + + +@pytest.mark.asyncio +async def test_async_transcribe_audio_file(distil_whisper_model): + test_data = np.random.rand(16000) # Simulated audio data (1 second) + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file: + audio_file_path = create_audio_file(test_data, 16000, audio_file.name) + transcription = await distil_whisper_model.async_transcribe(audio_file_path) + os.remove(audio_file_path) + + assert isinstance(transcription, str) + assert transcription.strip() != "" + + +def test_transcribe_audio_data(distil_whisper_model): + test_data = np.random.rand(16000) # Simulated audio data (1 second) + transcription = distil_whisper_model.transcribe(test_data.tobytes()) + + assert isinstance(transcription, str) + assert transcription.strip() != "" + + +@pytest.mark.asyncio +async def test_async_transcribe_audio_data(distil_whisper_model): + test_data = np.random.rand(16000) # Simulated audio data (1 second) + transcription = await distil_whisper_model.async_transcribe(test_data.tobytes()) + + assert isinstance(transcription, str) + assert transcription.strip() != "" + + +def test_real_time_transcribe(distil_whisper_model, capsys): + test_data = np.random.rand(16000 * 5) # Simulated audio data (5 seconds) + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file: + audio_file_path = create_audio_file(test_data, 16000, audio_file.name) + + distil_whisper_model.real_time_transcribe(audio_file_path, chunk_duration=1) + + os.remove(audio_file_path) + + captured = capsys.readouterr() + assert "Starting real-time transcription..." in captured.out + assert "Chunk" in captured.out + + +def test_real_time_transcribe_audio_file_not_found(distil_whisper_model, capsys): + audio_file_path = "non_existent_audio.wav" + distil_whisper_model.real_time_transcribe(audio_file_path, chunk_duration=1) + + captured = capsys.readouterr() + assert "The audio file was not found." in captured.out + + +@pytest.fixture +def mock_async_retry(): + def _mock_async_retry(retries=3, exceptions=(Exception,), delay=1): + def decorator(func): + @wraps(func) + async def wrapper(*args, **kwargs): + return await func(*args, **kwargs) + + return wrapper + + return decorator + + with patch("distil_whisper_model.async_retry", new=_mock_async_retry()): + yield + + +@pytest.mark.asyncio +async def test_async_retry_decorator_success(): + async def mock_async_function(): + return "Success" + + decorated_function = async_retry()(mock_async_function) + result = await decorated_function() + assert result == "Success" + + +@pytest.mark.asyncio +async def test_async_retry_decorator_failure(): + async def mock_async_function(): + raise Exception("Error") + + decorated_function = async_retry()(mock_async_function) + with pytest.raises(Exception, match="Error"): + await decorated_function() + + +@pytest.mark.asyncio +async def test_async_retry_decorator_multiple_attempts(): + async def mock_async_function(): + if mock_async_function.attempts == 0: + mock_async_function.attempts += 1 + raise Exception("Error") + else: + return "Success" + + mock_async_function.attempts = 0 + decorated_function = async_retry(max_retries=2)(mock_async_function) + result = await decorated_function() + assert result == "Success" + + +def test_create_audio_file(): + test_data = np.random.rand(16000) # Simulated audio data (1 second) + sample_rate = 16000 + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file: + audio_file_path = create_audio_file(test_data, sample_rate, audio_file.name) + + assert os.path.exists(audio_file_path) + os.remove(audio_file_path) + + +if __name__ == "__main__": + pytest.main() diff --git a/tests/models/jina_embeds.py b/tests/models/jina_embeds.py new file mode 100644 index 00000000..dd102d7c --- /dev/null +++ b/tests/models/jina_embeds.py @@ -0,0 +1,82 @@ +import pytest +import torch +from swarms.models.jina_embeds import JinaEmbeddings + + +@pytest.fixture +def model(): + return JinaEmbeddings("bert-base-uncased", verbose=True) + + +def test_initialization(model): + assert isinstance(model, JinaEmbeddings) + assert model.device in ["cuda", "cpu"] + assert model.max_length == 500 + assert model.verbose is True + + +def test_run_sync(model): + task = "Encode this text" + result = model.run(task) + assert isinstance(result, torch.Tensor) + assert result.shape == (model.max_length,) + + +def test_run_async(model): + task = "Encode this text" + result = model.run_async(task) + assert isinstance(result, torch.Tensor) + assert result.shape == (model.max_length,) + + +def test_save_model(tmp_path, model): + model_path = tmp_path / "model" + model.save_model(model_path) + assert (model_path / "config.json").is_file() + assert (model_path / "pytorch_model.bin").is_file() + assert (model_path / "vocab.txt").is_file() + + +def test_gpu_available(model): + gpu_status = model.gpu_available() + if torch.cuda.is_available(): + assert gpu_status is True + else: + assert gpu_status is False + + +def test_memory_consumption(model): + memory_stats = model.memory_consumption() + if torch.cuda.is_available(): + assert "allocated" in memory_stats + assert "reserved" in memory_stats + else: + assert "error" in memory_stats + + +def test_cosine_similarity(model): + task1 = "This is a sample text for testing." + task2 = "Another sample text for testing." + embeddings1 = model.run(task1) + embeddings2 = model.run(task2) + sim = model.cos_sim(embeddings1, embeddings2) + assert isinstance(sim, torch.Tensor) + assert sim.item() >= -1.0 and sim.item() <= 1.0 + + +def test_failed_load_model(caplog): + with pytest.raises(Exception): + JinaEmbeddings("invalid_model") + assert "Failed to load the model or the tokenizer" in caplog.text + + +def test_failed_generate_text(caplog, model): + with pytest.raises(Exception): + model.run("invalid_task") + assert "Failed to generate the text" in caplog.text + + +@pytest.mark.parametrize("device", ["cuda", "cpu"]) +def test_change_device(model, device): + model.device = device + assert model.device == device