diff --git a/Dockerfile b/Dockerfile index aa11856d..3fb40eb3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,42 +1,47 @@ -# ================================== -# Use an official Python runtime as a parent image -FROM python:3.9-slim - -# Set environment variables -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 - -# Set the working directory in the container -WORKDIR /usr/src/swarm_cloud - - -# Install Python dependencies -# COPY requirements.txt and pyproject.toml if you're using poetry for dependency management -COPY requirements.txt . -RUN pip install --upgrade pip -RUN pip install --no-cache-dir -r requirements.txt - -# Install the 'swarms' package, assuming it's available on PyPI -RUN pip install swarms - -# Copy the rest of the application -COPY . . - -# Add entrypoint script if needed -# COPY ./entrypoint.sh . -# RUN chmod +x /usr/src/swarm_cloud/entrypoint.sh - -# Expose port if your application has a web interface -# EXPOSE 5000 - -# # Define environment variable for the swarm to work -# ENV SWARM_API_KEY=your_swarm_api_key_here - -# # Add Docker CMD or ENTRYPOINT script to run the application -# CMD python your_swarm_startup_script.py -# Or use the entrypoint script if you have one -# ENTRYPOINT ["/usr/src/swarm_cloud/entrypoint.sh"] - -# If you're using `CMD` to execute a Python script, make sure it's executable -# RUN chmod +x your_swarm_startup_script.py +# Use an official NVIDIA CUDA runtime as a parent image +FROM python:3.10-slim-buster + +# Set the working directory in the container to /app +WORKDIR /app + +# Add the current directory contents into the container at /app +ADD . /app + +RUN apt update && apt install -y libsm6 libxext6 ffmpeg libfontconfig1 libxrender1 libgl1-mesa-glx + +# Install Python and other dependencies +RUN apt-get update && apt-get install libgl1 \ + apt-get update && apt-get install -y opencv-python-headless \ + apt-get install ffmpeg libsm6 libxext6 -y \ + pip3 install python3-opencv \ + apt-get -y install mesa-glx\ + pip install opencv-python-headless \ + apt-get update && apt-get install -y \ + python3-pip \ + libgl1-mesa-glx \ + && rm -rf /var/lib/apt/lists/* \ + find /usr -name libGL.so.1 \ + ln -s /usr/lib/x86_64-linux-gnu/mesa/libGL.so.1 /usr/lib/libGL.so.1 + +# Upgrade pip +RUN pip3 install --upgrade pip + +# Install any needed packages specified in requirements.txt +RUN pip install --no-cache-dir -r requirements.txt supervisor + +# Create the necessary directory and supervisord.conf +RUN mkdir -p /etc/supervisor/conf.d && \ + echo "[supervisord] \n\ + nodaemon=true \n\ + [program:host_local_tools] \n\ + command=python3 host_local_tools.py \n\ + [program:web_demo] \n\ + command=python3 web_demo.py \n\ + " > /etc/supervisor/conf.d/supervisord.conf + +# Make port 80 available to the world outside this container +EXPOSE 80 + +# Run supervisord when the container launches +CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..4c4fc262 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,14 @@ +version: '3' +services: + web_demo: + build: . + command: python web_demo.py + volumes: + - .:/app + ports: + - "5000:5000" + host_local_tools: + build: . + command: python host_local_tools.py + volumes: + - .:/app diff --git a/playground/tools/multi_test.py b/playground/tools/multi_test.py index edd3ad0d..9a09073e 100644 --- a/playground/tools/multi_test.py +++ b/playground/tools/multi_test.py @@ -1,4 +1,4 @@ -from bmtools.agent.tools_controller import load_valid_tools, MTQuestionAnswerer +from swarms.tools.tools_controller import load_valid_tools, MTQuestionAnswerer import jsonlines # Choose the tools that you need tools_mappings = { diff --git a/playground/tools/test.py b/playground/tools/test.py index 8dc74815..eb381082 100644 --- a/playground/tools/test.py +++ b/playground/tools/test.py @@ -1,4 +1,4 @@ -from bmtools.agent.singletool import load_single_tools, STQuestionAnswerer +from swarms.tools.singletool import load_single_tools, STQuestionAnswerer # Langchain tool_name, tool_url = 'weather', "http://127.0.0.1:8079/tools/weather/" diff --git a/requirements.txt b/requirements.txt index 4ce6ad76..a3ad83fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ # faiss-gpu +griptape transformers revChatGPT pandas @@ -33,7 +34,6 @@ chromadb tiktoken tabulate colored -griptape addict backoff ratelimit @@ -52,7 +52,6 @@ numpy omegaconf open_clip_torch openai -opencv-python prettytable safetensors streamlit diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py index f4e43d02..62ff0a2d 100644 --- a/swarms/models/__init__.py +++ b/swarms/models/__init__.py @@ -16,7 +16,6 @@ from swarms.models.mpt import MPT7B # MultiModal Models from swarms.models.idefics import Idefics -from swarms.models.kosmos_two import Kosmos from swarms.models.vilt import Vilt from swarms.models.nougat import Nougat from swarms.models.layoutlm_document_qa import LayoutLMDocumentQA @@ -42,7 +41,6 @@ __all__ = [ "OpenAIChat", "Zephyr", "Idefics", - "Kosmos", "Vilt", "Nougat", "LayoutLMDocumentQA", diff --git a/swarms/models/kosmos_two.py b/swarms/models/kosmos_two.py deleted file mode 100644 index 596886f3..00000000 --- a/swarms/models/kosmos_two.py +++ /dev/null @@ -1,286 +0,0 @@ -import os - -import cv2 -import numpy as np -import requests -import torch -import torchvision.transforms as T -from PIL import Image -from transformers import AutoModelForVision2Seq, AutoProcessor - - -# utils -def is_overlapping(rect1, rect2): - x1, y1, x2, y2 = rect1 - x3, y3, x4, y4 = rect2 - return not (x2 < x3 or x1 > x4 or y2 < y3 or y1 > y4) - - -class Kosmos: - """ - - Args: - - - # Initialize Kosmos - kosmos = Kosmos() - - # Perform multimodal grounding - kosmos.multimodal_grounding("Find the red apple in the image.", "https://example.com/apple.jpg") - - # Perform referring expression comprehension - kosmos.referring_expression_comprehension("Show me the green bottle.", "https://example.com/bottle.jpg") - - # Generate referring expressions - kosmos.referring_expression_generation("It is on the table.", "https://example.com/table.jpg") - - # Perform grounded visual question answering - kosmos.grounded_vqa("What is the color of the car?", "https://example.com/car.jpg") - - # Generate grounded image caption - kosmos.grounded_image_captioning("https://example.com/beach.jpg") - """ - - def __init__( - self, - model_name="ydshieh/kosmos-2-patch14-224", - ): - self.model = AutoModelForVision2Seq.from_pretrained( - model_name, trust_remote_code=True - ) - self.processor = AutoProcessor.from_pretrained( - model_name, trust_remote_code=True - ) - - def get_image(self, url): - """Image""" - return Image.open(requests.get(url, stream=True).raw) - - def run(self, prompt, image): - """Run Kosmos""" - inputs = self.processor(text=prompt, images=image, return_tensors="pt") - generated_ids = self.model.generate( - pixel_values=inputs["pixel_values"], - input_ids=inputs["input_ids"][:, :-1], - attention_mask=inputs["attention_mask"][:, :-1], - img_features=None, - img_attn_mask=inputs["img_attn_mask"][:, :-1], - use_cache=True, - max_new_tokens=64, - ) - generated_texts = self.processor.batch_decode( - generated_ids, - skip_special_tokens=True, - )[0] - processed_text, entities = self.processor.post_process_generation( - generated_texts - ) - - def __call__(self, prompt, image): - """Run call""" - inputs = self.processor(text=prompt, images=image, return_tensors="pt") - generated_ids = self.model.generate( - pixel_values=inputs["pixel_values"], - input_ids=inputs["input_ids"][:, :-1], - attention_mask=inputs["attention_mask"][:, :-1], - img_features=None, - img_attn_mask=inputs["img_attn_mask"][:, :-1], - use_cache=True, - max_new_tokens=64, - ) - generated_texts = self.processor.batch_decode( - generated_ids, - skip_special_tokens=True, - )[0] - processed_text, entities = self.processor.post_process_generation( - generated_texts - ) - - # tasks - def multimodal_grounding(self, phrase, image_url): - prompt = f" {phrase} " - self.run(prompt, image_url) - - def referring_expression_comprehension(self, phrase, image_url): - prompt = f" {phrase} " - self.run(prompt, image_url) - - def referring_expression_generation(self, phrase, image_url): - prompt = ( - "" - " It is" - ) - self.run(prompt, image_url) - - def grounded_vqa(self, question, image_url): - prompt = f" Question: {question} Answer:" - self.run(prompt, image_url) - - def grounded_image_captioning(self, image_url): - prompt = " An image of" - self.run(prompt, image_url) - - def grounded_image_captioning_detailed(self, image_url): - prompt = " Describe this image in detail" - self.run(prompt, image_url) - - def draw_entity_boxes_on_image(image, entities, show=False, save_path=None): - """_summary_ - Args: - image (_type_): image or image path - collect_entity_location (_type_): _description_ - """ - if isinstance(image, Image.Image): - image_h = image.height - image_w = image.width - image = np.array(image)[:, :, [2, 1, 0]] - elif isinstance(image, str): - if os.path.exists(image): - pil_img = Image.open(image).convert("RGB") - image = np.array(pil_img)[:, :, [2, 1, 0]] - image_h = pil_img.height - image_w = pil_img.width - else: - raise ValueError(f"invaild image path, {image}") - elif isinstance(image, torch.Tensor): - # pdb.set_trace() - image_tensor = image.cpu() - reverse_norm_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073])[ - :, None, None - ] - reverse_norm_std = torch.tensor([0.26862954, 0.26130258, 0.27577711])[ - :, None, None - ] - image_tensor = image_tensor * reverse_norm_std + reverse_norm_mean - pil_img = T.ToPILImage()(image_tensor) - image_h = pil_img.height - image_w = pil_img.width - image = np.array(pil_img)[:, :, [2, 1, 0]] - else: - raise ValueError(f"invaild image format, {type(image)} for {image}") - - if len(entities) == 0: - return image - - new_image = image.copy() - previous_bboxes = [] - # size of text - text_size = 1 - # thickness of text - text_line = 1 # int(max(1 * min(image_h, image_w) / 512, 1)) - box_line = 3 - (c_width, text_height), _ = cv2.getTextSize( - "F", cv2.FONT_HERSHEY_COMPLEX, text_size, text_line - ) - base_height = int(text_height * 0.675) - text_offset_original = text_height - base_height - text_spaces = 3 - - for entity_name, (start, end), bboxes in entities: - for x1_norm, y1_norm, x2_norm, y2_norm in bboxes: - orig_x1, orig_y1, orig_x2, orig_y2 = ( - int(x1_norm * image_w), - int(y1_norm * image_h), - int(x2_norm * image_w), - int(y2_norm * image_h), - ) - # draw bbox - # random color - color = tuple(np.random.randint(0, 255, size=3).tolist()) - new_image = cv2.rectangle( - new_image, (orig_x1, orig_y1), (orig_x2, orig_y2), color, box_line - ) - - l_o, r_o = ( - box_line // 2 + box_line % 2, - box_line // 2 + box_line % 2 + 1, - ) - - x1 = orig_x1 - l_o - y1 = orig_y1 - l_o - - if y1 < text_height + text_offset_original + 2 * text_spaces: - y1 = ( - orig_y1 - + r_o - + text_height - + text_offset_original - + 2 * text_spaces - ) - x1 = orig_x1 + r_o - - # add text background - (text_width, text_height), _ = cv2.getTextSize( - f" {entity_name}", cv2.FONT_HERSHEY_COMPLEX, text_size, text_line - ) - text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2 = ( - x1, - y1 - (text_height + text_offset_original + 2 * text_spaces), - x1 + text_width, - y1, - ) - - for prev_bbox in previous_bboxes: - while is_overlapping( - (text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2), prev_bbox - ): - text_bg_y1 += ( - text_height + text_offset_original + 2 * text_spaces - ) - text_bg_y2 += ( - text_height + text_offset_original + 2 * text_spaces - ) - y1 += text_height + text_offset_original + 2 * text_spaces - - if text_bg_y2 >= image_h: - text_bg_y1 = max( - 0, - image_h - - ( - text_height + text_offset_original + 2 * text_spaces - ), - ) - text_bg_y2 = image_h - y1 = image_h - break - - alpha = 0.5 - for i in range(text_bg_y1, text_bg_y2): - for j in range(text_bg_x1, text_bg_x2): - if i < image_h and j < image_w: - if j < text_bg_x1 + 1.35 * c_width: - # original color - bg_color = color - else: - # white - bg_color = [255, 255, 255] - new_image[i, j] = ( - alpha * new_image[i, j] - + (1 - alpha) * np.array(bg_color) - ).astype(np.uint8) - - cv2.putText( - new_image, - f" {entity_name}", - (x1, y1 - text_offset_original - 1 * text_spaces), - cv2.FONT_HERSHEY_COMPLEX, - text_size, - (0, 0, 0), - text_line, - cv2.LINE_AA, - ) - # previous_locations.append((x1, y1)) - previous_bboxes.append((text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2)) - - pil_image = Image.fromarray(new_image[:, :, [2, 1, 0]]) - if save_path: - pil_image.save(save_path) - if show: - pil_image.show() - - return new_image - - def generate_boxees(self, prompt, image_url): - image = self.get_image(image_url) - processed_text, entities = self.process_prompt(prompt, image) - self.draw_entity_boxes_on_image(image, entities, show=True) diff --git a/swarms/tools/agent/apitool.py b/swarms/tools/agent/apitool.py index 59990d50..650cc109 100644 --- a/swarms/tools/agent/apitool.py +++ b/swarms/tools/agent/apitool.py @@ -10,7 +10,7 @@ import aiohttp import http.client http.client._MAXLINE = 655360 -from bmtools import get_logger +from swarms.utils import get_logger logger = get_logger(__name__) diff --git a/swarms/tools/agent/executor.py b/swarms/tools/agent/executor.py index a5b2e8c8..56085cf3 100644 --- a/swarms/tools/agent/executor.py +++ b/swarms/tools/agent/executor.py @@ -4,7 +4,7 @@ from typing import Any, Dict, List, Tuple, Union from langchain.agents import AgentExecutor from langchain.input import get_color_mapping from langchain.schema import AgentAction, AgentFinish -from bmtools.agent.translator import Translator +from .translator import Translator class AgentExecutorWithTranslation(AgentExecutor): @@ -111,4 +111,4 @@ class Executor(AgentExecutorWithTranslation): raise e self.callback_manager.on_chain_end(output, verbose=self.verbose) # return self.prep_outputs(inputs, output, return_only_outputs) - return output \ No newline at end of file + return output diff --git a/swarms/tools/agent/singletool.py b/swarms/tools/agent/singletool.py index 624389da..0dcf9009 100644 --- a/swarms/tools/agent/singletool.py +++ b/swarms/tools/agent/singletool.py @@ -6,11 +6,12 @@ import json import os import requests import yaml -from bmtools.agent.apitool import RequestTool -from bmtools.agent.executor import Executor, AgentExecutorWithTranslation -from bmtools import get_logger -from bmtools.agent.BabyagiTools import BabyAGI -# from bmtools.models.customllm import CustomLLM +from swarms.tools.agent.apitool import RequestTool +from swarms.tools.agent.executor import Executor, AgentExecutorWithTranslation +from swarms.tools import get_logger +from swarms.tools.agent.BabyagiTools import BabyAGI +# from swarms.tools +#.models.customllm import CustomLLM logger = get_logger(__name__) diff --git a/swarms/tools/apitool.py b/swarms/tools/apitool.py index 59990d50..650cc109 100644 --- a/swarms/tools/apitool.py +++ b/swarms/tools/apitool.py @@ -10,7 +10,7 @@ import aiohttp import http.client http.client._MAXLINE = 655360 -from bmtools import get_logger +from swarms.utils import get_logger logger = get_logger(__name__)