feat: Dockerize

Former-commit-id: 6374c5ef4fd0dd084e5e23b476c45416d90dbcd8
pull/186/head
Zack 2 years ago
parent 5ab96fb0f9
commit 5717c71924

@ -1,42 +1,47 @@
# ==================================
# Use an official Python runtime as a parent image
FROM python:3.9-slim
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1
# Set the working directory in the container
WORKDIR /usr/src/swarm_cloud
# Install Python dependencies
# COPY requirements.txt and pyproject.toml if you're using poetry for dependency management
COPY requirements.txt .
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt
# Install the 'swarms' package, assuming it's available on PyPI
RUN pip install swarms
# Copy the rest of the application
COPY . .
# Add entrypoint script if needed
# COPY ./entrypoint.sh .
# RUN chmod +x /usr/src/swarm_cloud/entrypoint.sh
# Expose port if your application has a web interface
# EXPOSE 5000
# # Define environment variable for the swarm to work
# ENV SWARM_API_KEY=your_swarm_api_key_here
# # Add Docker CMD or ENTRYPOINT script to run the application
# CMD python your_swarm_startup_script.py
# Or use the entrypoint script if you have one
# ENTRYPOINT ["/usr/src/swarm_cloud/entrypoint.sh"]
# If you're using `CMD` to execute a Python script, make sure it's executable
# RUN chmod +x your_swarm_startup_script.py
# Use an official NVIDIA CUDA runtime as a parent image
FROM python:3.10-slim-buster
# Set the working directory in the container to /app
WORKDIR /app
# Add the current directory contents into the container at /app
ADD . /app
RUN apt update && apt install -y libsm6 libxext6 ffmpeg libfontconfig1 libxrender1 libgl1-mesa-glx
# Install Python and other dependencies
RUN apt-get update && apt-get install libgl1 \
apt-get update && apt-get install -y opencv-python-headless \
apt-get install ffmpeg libsm6 libxext6 -y \
pip3 install python3-opencv \
apt-get -y install mesa-glx\
pip install opencv-python-headless \
apt-get update && apt-get install -y \
python3-pip \
libgl1-mesa-glx \
&& rm -rf /var/lib/apt/lists/* \
find /usr -name libGL.so.1 \
ln -s /usr/lib/x86_64-linux-gnu/mesa/libGL.so.1 /usr/lib/libGL.so.1
# Upgrade pip
RUN pip3 install --upgrade pip
# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt supervisor
# Create the necessary directory and supervisord.conf
RUN mkdir -p /etc/supervisor/conf.d && \
echo "[supervisord] \n\
nodaemon=true \n\
[program:host_local_tools] \n\
command=python3 host_local_tools.py \n\
[program:web_demo] \n\
command=python3 web_demo.py \n\
" > /etc/supervisor/conf.d/supervisord.conf
# Make port 80 available to the world outside this container
EXPOSE 80
# Run supervisord when the container launches
CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

@ -0,0 +1,14 @@
version: '3'
services:
web_demo:
build: .
command: python web_demo.py
volumes:
- .:/app
ports:
- "5000:5000"
host_local_tools:
build: .
command: python host_local_tools.py
volumes:
- .:/app

@ -1,4 +1,4 @@
from bmtools.agent.tools_controller import load_valid_tools, MTQuestionAnswerer
from swarms.tools.tools_controller import load_valid_tools, MTQuestionAnswerer
import jsonlines
# Choose the tools that you need
tools_mappings = {

@ -1,4 +1,4 @@
from bmtools.agent.singletool import load_single_tools, STQuestionAnswerer
from swarms.tools.singletool import load_single_tools, STQuestionAnswerer
# Langchain
tool_name, tool_url = 'weather', "http://127.0.0.1:8079/tools/weather/"

@ -1,4 +1,5 @@
# faiss-gpu
griptape
transformers
revChatGPT
pandas
@ -33,7 +34,6 @@ chromadb
tiktoken
tabulate
colored
griptape
addict
backoff
ratelimit
@ -52,7 +52,6 @@ numpy
omegaconf
open_clip_torch
openai
opencv-python
prettytable
safetensors
streamlit

@ -16,7 +16,6 @@ from swarms.models.mpt import MPT7B
# MultiModal Models
from swarms.models.idefics import Idefics
from swarms.models.kosmos_two import Kosmos
from swarms.models.vilt import Vilt
from swarms.models.nougat import Nougat
from swarms.models.layoutlm_document_qa import LayoutLMDocumentQA
@ -42,7 +41,6 @@ __all__ = [
"OpenAIChat",
"Zephyr",
"Idefics",
"Kosmos",
"Vilt",
"Nougat",
"LayoutLMDocumentQA",

@ -1,286 +0,0 @@
import os
import cv2
import numpy as np
import requests
import torch
import torchvision.transforms as T
from PIL import Image
from transformers import AutoModelForVision2Seq, AutoProcessor
# utils
def is_overlapping(rect1, rect2):
x1, y1, x2, y2 = rect1
x3, y3, x4, y4 = rect2
return not (x2 < x3 or x1 > x4 or y2 < y3 or y1 > y4)
class Kosmos:
"""
Args:
# Initialize Kosmos
kosmos = Kosmos()
# Perform multimodal grounding
kosmos.multimodal_grounding("Find the red apple in the image.", "https://example.com/apple.jpg")
# Perform referring expression comprehension
kosmos.referring_expression_comprehension("Show me the green bottle.", "https://example.com/bottle.jpg")
# Generate referring expressions
kosmos.referring_expression_generation("It is on the table.", "https://example.com/table.jpg")
# Perform grounded visual question answering
kosmos.grounded_vqa("What is the color of the car?", "https://example.com/car.jpg")
# Generate grounded image caption
kosmos.grounded_image_captioning("https://example.com/beach.jpg")
"""
def __init__(
self,
model_name="ydshieh/kosmos-2-patch14-224",
):
self.model = AutoModelForVision2Seq.from_pretrained(
model_name, trust_remote_code=True
)
self.processor = AutoProcessor.from_pretrained(
model_name, trust_remote_code=True
)
def get_image(self, url):
"""Image"""
return Image.open(requests.get(url, stream=True).raw)
def run(self, prompt, image):
"""Run Kosmos"""
inputs = self.processor(text=prompt, images=image, return_tensors="pt")
generated_ids = self.model.generate(
pixel_values=inputs["pixel_values"],
input_ids=inputs["input_ids"][:, :-1],
attention_mask=inputs["attention_mask"][:, :-1],
img_features=None,
img_attn_mask=inputs["img_attn_mask"][:, :-1],
use_cache=True,
max_new_tokens=64,
)
generated_texts = self.processor.batch_decode(
generated_ids,
skip_special_tokens=True,
)[0]
processed_text, entities = self.processor.post_process_generation(
generated_texts
)
def __call__(self, prompt, image):
"""Run call"""
inputs = self.processor(text=prompt, images=image, return_tensors="pt")
generated_ids = self.model.generate(
pixel_values=inputs["pixel_values"],
input_ids=inputs["input_ids"][:, :-1],
attention_mask=inputs["attention_mask"][:, :-1],
img_features=None,
img_attn_mask=inputs["img_attn_mask"][:, :-1],
use_cache=True,
max_new_tokens=64,
)
generated_texts = self.processor.batch_decode(
generated_ids,
skip_special_tokens=True,
)[0]
processed_text, entities = self.processor.post_process_generation(
generated_texts
)
# tasks
def multimodal_grounding(self, phrase, image_url):
prompt = f"<grounding><phrase> {phrase} </phrase>"
self.run(prompt, image_url)
def referring_expression_comprehension(self, phrase, image_url):
prompt = f"<grounding><phrase> {phrase} </phrase>"
self.run(prompt, image_url)
def referring_expression_generation(self, phrase, image_url):
prompt = (
"<grounding><phrase>"
" It</phrase><object><patch_index_0044><patch_index_0863></object> is"
)
self.run(prompt, image_url)
def grounded_vqa(self, question, image_url):
prompt = f"<grounding> Question: {question} Answer:"
self.run(prompt, image_url)
def grounded_image_captioning(self, image_url):
prompt = "<grounding> An image of"
self.run(prompt, image_url)
def grounded_image_captioning_detailed(self, image_url):
prompt = "<grounding> Describe this image in detail"
self.run(prompt, image_url)
def draw_entity_boxes_on_image(image, entities, show=False, save_path=None):
"""_summary_
Args:
image (_type_): image or image path
collect_entity_location (_type_): _description_
"""
if isinstance(image, Image.Image):
image_h = image.height
image_w = image.width
image = np.array(image)[:, :, [2, 1, 0]]
elif isinstance(image, str):
if os.path.exists(image):
pil_img = Image.open(image).convert("RGB")
image = np.array(pil_img)[:, :, [2, 1, 0]]
image_h = pil_img.height
image_w = pil_img.width
else:
raise ValueError(f"invaild image path, {image}")
elif isinstance(image, torch.Tensor):
# pdb.set_trace()
image_tensor = image.cpu()
reverse_norm_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073])[
:, None, None
]
reverse_norm_std = torch.tensor([0.26862954, 0.26130258, 0.27577711])[
:, None, None
]
image_tensor = image_tensor * reverse_norm_std + reverse_norm_mean
pil_img = T.ToPILImage()(image_tensor)
image_h = pil_img.height
image_w = pil_img.width
image = np.array(pil_img)[:, :, [2, 1, 0]]
else:
raise ValueError(f"invaild image format, {type(image)} for {image}")
if len(entities) == 0:
return image
new_image = image.copy()
previous_bboxes = []
# size of text
text_size = 1
# thickness of text
text_line = 1 # int(max(1 * min(image_h, image_w) / 512, 1))
box_line = 3
(c_width, text_height), _ = cv2.getTextSize(
"F", cv2.FONT_HERSHEY_COMPLEX, text_size, text_line
)
base_height = int(text_height * 0.675)
text_offset_original = text_height - base_height
text_spaces = 3
for entity_name, (start, end), bboxes in entities:
for x1_norm, y1_norm, x2_norm, y2_norm in bboxes:
orig_x1, orig_y1, orig_x2, orig_y2 = (
int(x1_norm * image_w),
int(y1_norm * image_h),
int(x2_norm * image_w),
int(y2_norm * image_h),
)
# draw bbox
# random color
color = tuple(np.random.randint(0, 255, size=3).tolist())
new_image = cv2.rectangle(
new_image, (orig_x1, orig_y1), (orig_x2, orig_y2), color, box_line
)
l_o, r_o = (
box_line // 2 + box_line % 2,
box_line // 2 + box_line % 2 + 1,
)
x1 = orig_x1 - l_o
y1 = orig_y1 - l_o
if y1 < text_height + text_offset_original + 2 * text_spaces:
y1 = (
orig_y1
+ r_o
+ text_height
+ text_offset_original
+ 2 * text_spaces
)
x1 = orig_x1 + r_o
# add text background
(text_width, text_height), _ = cv2.getTextSize(
f" {entity_name}", cv2.FONT_HERSHEY_COMPLEX, text_size, text_line
)
text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2 = (
x1,
y1 - (text_height + text_offset_original + 2 * text_spaces),
x1 + text_width,
y1,
)
for prev_bbox in previous_bboxes:
while is_overlapping(
(text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2), prev_bbox
):
text_bg_y1 += (
text_height + text_offset_original + 2 * text_spaces
)
text_bg_y2 += (
text_height + text_offset_original + 2 * text_spaces
)
y1 += text_height + text_offset_original + 2 * text_spaces
if text_bg_y2 >= image_h:
text_bg_y1 = max(
0,
image_h
- (
text_height + text_offset_original + 2 * text_spaces
),
)
text_bg_y2 = image_h
y1 = image_h
break
alpha = 0.5
for i in range(text_bg_y1, text_bg_y2):
for j in range(text_bg_x1, text_bg_x2):
if i < image_h and j < image_w:
if j < text_bg_x1 + 1.35 * c_width:
# original color
bg_color = color
else:
# white
bg_color = [255, 255, 255]
new_image[i, j] = (
alpha * new_image[i, j]
+ (1 - alpha) * np.array(bg_color)
).astype(np.uint8)
cv2.putText(
new_image,
f" {entity_name}",
(x1, y1 - text_offset_original - 1 * text_spaces),
cv2.FONT_HERSHEY_COMPLEX,
text_size,
(0, 0, 0),
text_line,
cv2.LINE_AA,
)
# previous_locations.append((x1, y1))
previous_bboxes.append((text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2))
pil_image = Image.fromarray(new_image[:, :, [2, 1, 0]])
if save_path:
pil_image.save(save_path)
if show:
pil_image.show()
return new_image
def generate_boxees(self, prompt, image_url):
image = self.get_image(image_url)
processed_text, entities = self.process_prompt(prompt, image)
self.draw_entity_boxes_on_image(image, entities, show=True)

@ -10,7 +10,7 @@ import aiohttp
import http.client
http.client._MAXLINE = 655360
from bmtools import get_logger
from swarms.utils import get_logger
logger = get_logger(__name__)

@ -4,7 +4,7 @@ from typing import Any, Dict, List, Tuple, Union
from langchain.agents import AgentExecutor
from langchain.input import get_color_mapping
from langchain.schema import AgentAction, AgentFinish
from bmtools.agent.translator import Translator
from .translator import Translator
class AgentExecutorWithTranslation(AgentExecutor):

@ -6,11 +6,12 @@ import json
import os
import requests
import yaml
from bmtools.agent.apitool import RequestTool
from bmtools.agent.executor import Executor, AgentExecutorWithTranslation
from bmtools import get_logger
from bmtools.agent.BabyagiTools import BabyAGI
# from bmtools.models.customllm import CustomLLM
from swarms.tools.agent.apitool import RequestTool
from swarms.tools.agent.executor import Executor, AgentExecutorWithTranslation
from swarms.tools import get_logger
from swarms.tools.agent.BabyagiTools import BabyAGI
# from swarms.tools
#.models.customllm import CustomLLM
logger = get_logger(__name__)

@ -10,7 +10,7 @@ import aiohttp
import http.client
http.client._MAXLINE = 655360
from bmtools import get_logger
from swarms.utils import get_logger
logger = get_logger(__name__)

Loading…
Cancel
Save