feat: Dockerize

Former-commit-id: 6374c5ef4fd0dd084e5e23b476c45416d90dbcd8
pull/186/head
Zack 2 years ago
parent 5ab96fb0f9
commit 5717c71924

@ -1,42 +1,47 @@
# ================================== # Use an official NVIDIA CUDA runtime as a parent image
# Use an official Python runtime as a parent image FROM python:3.10-slim-buster
FROM python:3.9-slim
# Set the working directory in the container to /app
# Set environment variables WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1 # Add the current directory contents into the container at /app
ADD . /app
# Set the working directory in the container
WORKDIR /usr/src/swarm_cloud RUN apt update && apt install -y libsm6 libxext6 ffmpeg libfontconfig1 libxrender1 libgl1-mesa-glx
# Install Python and other dependencies
# Install Python dependencies RUN apt-get update && apt-get install libgl1 \
# COPY requirements.txt and pyproject.toml if you're using poetry for dependency management apt-get update && apt-get install -y opencv-python-headless \
COPY requirements.txt . apt-get install ffmpeg libsm6 libxext6 -y \
RUN pip install --upgrade pip pip3 install python3-opencv \
RUN pip install --no-cache-dir -r requirements.txt apt-get -y install mesa-glx\
pip install opencv-python-headless \
# Install the 'swarms' package, assuming it's available on PyPI apt-get update && apt-get install -y \
RUN pip install swarms python3-pip \
libgl1-mesa-glx \
# Copy the rest of the application && rm -rf /var/lib/apt/lists/* \
COPY . . find /usr -name libGL.so.1 \
ln -s /usr/lib/x86_64-linux-gnu/mesa/libGL.so.1 /usr/lib/libGL.so.1
# Add entrypoint script if needed
# COPY ./entrypoint.sh . # Upgrade pip
# RUN chmod +x /usr/src/swarm_cloud/entrypoint.sh RUN pip3 install --upgrade pip
# Expose port if your application has a web interface # Install any needed packages specified in requirements.txt
# EXPOSE 5000 RUN pip install --no-cache-dir -r requirements.txt supervisor
# # Define environment variable for the swarm to work # Create the necessary directory and supervisord.conf
# ENV SWARM_API_KEY=your_swarm_api_key_here RUN mkdir -p /etc/supervisor/conf.d && \
echo "[supervisord] \n\
# # Add Docker CMD or ENTRYPOINT script to run the application nodaemon=true \n\
# CMD python your_swarm_startup_script.py [program:host_local_tools] \n\
# Or use the entrypoint script if you have one command=python3 host_local_tools.py \n\
# ENTRYPOINT ["/usr/src/swarm_cloud/entrypoint.sh"] [program:web_demo] \n\
command=python3 web_demo.py \n\
# If you're using `CMD` to execute a Python script, make sure it's executable " > /etc/supervisor/conf.d/supervisord.conf
# RUN chmod +x your_swarm_startup_script.py
# Make port 80 available to the world outside this container
EXPOSE 80
# Run supervisord when the container launches
CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

@ -0,0 +1,14 @@
version: '3'
services:
web_demo:
build: .
command: python web_demo.py
volumes:
- .:/app
ports:
- "5000:5000"
host_local_tools:
build: .
command: python host_local_tools.py
volumes:
- .:/app

@ -1,4 +1,4 @@
from bmtools.agent.tools_controller import load_valid_tools, MTQuestionAnswerer from swarms.tools.tools_controller import load_valid_tools, MTQuestionAnswerer
import jsonlines import jsonlines
# Choose the tools that you need # Choose the tools that you need
tools_mappings = { tools_mappings = {

@ -1,4 +1,4 @@
from bmtools.agent.singletool import load_single_tools, STQuestionAnswerer from swarms.tools.singletool import load_single_tools, STQuestionAnswerer
# Langchain # Langchain
tool_name, tool_url = 'weather', "http://127.0.0.1:8079/tools/weather/" tool_name, tool_url = 'weather', "http://127.0.0.1:8079/tools/weather/"

@ -1,4 +1,5 @@
# faiss-gpu # faiss-gpu
griptape
transformers transformers
revChatGPT revChatGPT
pandas pandas
@ -33,7 +34,6 @@ chromadb
tiktoken tiktoken
tabulate tabulate
colored colored
griptape
addict addict
backoff backoff
ratelimit ratelimit
@ -52,7 +52,6 @@ numpy
omegaconf omegaconf
open_clip_torch open_clip_torch
openai openai
opencv-python
prettytable prettytable
safetensors safetensors
streamlit streamlit

@ -16,7 +16,6 @@ from swarms.models.mpt import MPT7B
# MultiModal Models # MultiModal Models
from swarms.models.idefics import Idefics from swarms.models.idefics import Idefics
from swarms.models.kosmos_two import Kosmos
from swarms.models.vilt import Vilt from swarms.models.vilt import Vilt
from swarms.models.nougat import Nougat from swarms.models.nougat import Nougat
from swarms.models.layoutlm_document_qa import LayoutLMDocumentQA from swarms.models.layoutlm_document_qa import LayoutLMDocumentQA
@ -42,7 +41,6 @@ __all__ = [
"OpenAIChat", "OpenAIChat",
"Zephyr", "Zephyr",
"Idefics", "Idefics",
"Kosmos",
"Vilt", "Vilt",
"Nougat", "Nougat",
"LayoutLMDocumentQA", "LayoutLMDocumentQA",

@ -1,286 +0,0 @@
import os
import cv2
import numpy as np
import requests
import torch
import torchvision.transforms as T
from PIL import Image
from transformers import AutoModelForVision2Seq, AutoProcessor
# utils
def is_overlapping(rect1, rect2):
x1, y1, x2, y2 = rect1
x3, y3, x4, y4 = rect2
return not (x2 < x3 or x1 > x4 or y2 < y3 or y1 > y4)
class Kosmos:
"""
Args:
# Initialize Kosmos
kosmos = Kosmos()
# Perform multimodal grounding
kosmos.multimodal_grounding("Find the red apple in the image.", "https://example.com/apple.jpg")
# Perform referring expression comprehension
kosmos.referring_expression_comprehension("Show me the green bottle.", "https://example.com/bottle.jpg")
# Generate referring expressions
kosmos.referring_expression_generation("It is on the table.", "https://example.com/table.jpg")
# Perform grounded visual question answering
kosmos.grounded_vqa("What is the color of the car?", "https://example.com/car.jpg")
# Generate grounded image caption
kosmos.grounded_image_captioning("https://example.com/beach.jpg")
"""
def __init__(
self,
model_name="ydshieh/kosmos-2-patch14-224",
):
self.model = AutoModelForVision2Seq.from_pretrained(
model_name, trust_remote_code=True
)
self.processor = AutoProcessor.from_pretrained(
model_name, trust_remote_code=True
)
def get_image(self, url):
"""Image"""
return Image.open(requests.get(url, stream=True).raw)
def run(self, prompt, image):
"""Run Kosmos"""
inputs = self.processor(text=prompt, images=image, return_tensors="pt")
generated_ids = self.model.generate(
pixel_values=inputs["pixel_values"],
input_ids=inputs["input_ids"][:, :-1],
attention_mask=inputs["attention_mask"][:, :-1],
img_features=None,
img_attn_mask=inputs["img_attn_mask"][:, :-1],
use_cache=True,
max_new_tokens=64,
)
generated_texts = self.processor.batch_decode(
generated_ids,
skip_special_tokens=True,
)[0]
processed_text, entities = self.processor.post_process_generation(
generated_texts
)
def __call__(self, prompt, image):
"""Run call"""
inputs = self.processor(text=prompt, images=image, return_tensors="pt")
generated_ids = self.model.generate(
pixel_values=inputs["pixel_values"],
input_ids=inputs["input_ids"][:, :-1],
attention_mask=inputs["attention_mask"][:, :-1],
img_features=None,
img_attn_mask=inputs["img_attn_mask"][:, :-1],
use_cache=True,
max_new_tokens=64,
)
generated_texts = self.processor.batch_decode(
generated_ids,
skip_special_tokens=True,
)[0]
processed_text, entities = self.processor.post_process_generation(
generated_texts
)
# tasks
def multimodal_grounding(self, phrase, image_url):
prompt = f"<grounding><phrase> {phrase} </phrase>"
self.run(prompt, image_url)
def referring_expression_comprehension(self, phrase, image_url):
prompt = f"<grounding><phrase> {phrase} </phrase>"
self.run(prompt, image_url)
def referring_expression_generation(self, phrase, image_url):
prompt = (
"<grounding><phrase>"
" It</phrase><object><patch_index_0044><patch_index_0863></object> is"
)
self.run(prompt, image_url)
def grounded_vqa(self, question, image_url):
prompt = f"<grounding> Question: {question} Answer:"
self.run(prompt, image_url)
def grounded_image_captioning(self, image_url):
prompt = "<grounding> An image of"
self.run(prompt, image_url)
def grounded_image_captioning_detailed(self, image_url):
prompt = "<grounding> Describe this image in detail"
self.run(prompt, image_url)
def draw_entity_boxes_on_image(image, entities, show=False, save_path=None):
"""_summary_
Args:
image (_type_): image or image path
collect_entity_location (_type_): _description_
"""
if isinstance(image, Image.Image):
image_h = image.height
image_w = image.width
image = np.array(image)[:, :, [2, 1, 0]]
elif isinstance(image, str):
if os.path.exists(image):
pil_img = Image.open(image).convert("RGB")
image = np.array(pil_img)[:, :, [2, 1, 0]]
image_h = pil_img.height
image_w = pil_img.width
else:
raise ValueError(f"invaild image path, {image}")
elif isinstance(image, torch.Tensor):
# pdb.set_trace()
image_tensor = image.cpu()
reverse_norm_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073])[
:, None, None
]
reverse_norm_std = torch.tensor([0.26862954, 0.26130258, 0.27577711])[
:, None, None
]
image_tensor = image_tensor * reverse_norm_std + reverse_norm_mean
pil_img = T.ToPILImage()(image_tensor)
image_h = pil_img.height
image_w = pil_img.width
image = np.array(pil_img)[:, :, [2, 1, 0]]
else:
raise ValueError(f"invaild image format, {type(image)} for {image}")
if len(entities) == 0:
return image
new_image = image.copy()
previous_bboxes = []
# size of text
text_size = 1
# thickness of text
text_line = 1 # int(max(1 * min(image_h, image_w) / 512, 1))
box_line = 3
(c_width, text_height), _ = cv2.getTextSize(
"F", cv2.FONT_HERSHEY_COMPLEX, text_size, text_line
)
base_height = int(text_height * 0.675)
text_offset_original = text_height - base_height
text_spaces = 3
for entity_name, (start, end), bboxes in entities:
for x1_norm, y1_norm, x2_norm, y2_norm in bboxes:
orig_x1, orig_y1, orig_x2, orig_y2 = (
int(x1_norm * image_w),
int(y1_norm * image_h),
int(x2_norm * image_w),
int(y2_norm * image_h),
)
# draw bbox
# random color
color = tuple(np.random.randint(0, 255, size=3).tolist())
new_image = cv2.rectangle(
new_image, (orig_x1, orig_y1), (orig_x2, orig_y2), color, box_line
)
l_o, r_o = (
box_line // 2 + box_line % 2,
box_line // 2 + box_line % 2 + 1,
)
x1 = orig_x1 - l_o
y1 = orig_y1 - l_o
if y1 < text_height + text_offset_original + 2 * text_spaces:
y1 = (
orig_y1
+ r_o
+ text_height
+ text_offset_original
+ 2 * text_spaces
)
x1 = orig_x1 + r_o
# add text background
(text_width, text_height), _ = cv2.getTextSize(
f" {entity_name}", cv2.FONT_HERSHEY_COMPLEX, text_size, text_line
)
text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2 = (
x1,
y1 - (text_height + text_offset_original + 2 * text_spaces),
x1 + text_width,
y1,
)
for prev_bbox in previous_bboxes:
while is_overlapping(
(text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2), prev_bbox
):
text_bg_y1 += (
text_height + text_offset_original + 2 * text_spaces
)
text_bg_y2 += (
text_height + text_offset_original + 2 * text_spaces
)
y1 += text_height + text_offset_original + 2 * text_spaces
if text_bg_y2 >= image_h:
text_bg_y1 = max(
0,
image_h
- (
text_height + text_offset_original + 2 * text_spaces
),
)
text_bg_y2 = image_h
y1 = image_h
break
alpha = 0.5
for i in range(text_bg_y1, text_bg_y2):
for j in range(text_bg_x1, text_bg_x2):
if i < image_h and j < image_w:
if j < text_bg_x1 + 1.35 * c_width:
# original color
bg_color = color
else:
# white
bg_color = [255, 255, 255]
new_image[i, j] = (
alpha * new_image[i, j]
+ (1 - alpha) * np.array(bg_color)
).astype(np.uint8)
cv2.putText(
new_image,
f" {entity_name}",
(x1, y1 - text_offset_original - 1 * text_spaces),
cv2.FONT_HERSHEY_COMPLEX,
text_size,
(0, 0, 0),
text_line,
cv2.LINE_AA,
)
# previous_locations.append((x1, y1))
previous_bboxes.append((text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2))
pil_image = Image.fromarray(new_image[:, :, [2, 1, 0]])
if save_path:
pil_image.save(save_path)
if show:
pil_image.show()
return new_image
def generate_boxees(self, prompt, image_url):
image = self.get_image(image_url)
processed_text, entities = self.process_prompt(prompt, image)
self.draw_entity_boxes_on_image(image, entities, show=True)

@ -10,7 +10,7 @@ import aiohttp
import http.client import http.client
http.client._MAXLINE = 655360 http.client._MAXLINE = 655360
from bmtools import get_logger from swarms.utils import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)

@ -4,7 +4,7 @@ from typing import Any, Dict, List, Tuple, Union
from langchain.agents import AgentExecutor from langchain.agents import AgentExecutor
from langchain.input import get_color_mapping from langchain.input import get_color_mapping
from langchain.schema import AgentAction, AgentFinish from langchain.schema import AgentAction, AgentFinish
from bmtools.agent.translator import Translator from .translator import Translator
class AgentExecutorWithTranslation(AgentExecutor): class AgentExecutorWithTranslation(AgentExecutor):

@ -6,11 +6,12 @@ import json
import os import os
import requests import requests
import yaml import yaml
from bmtools.agent.apitool import RequestTool from swarms.tools.agent.apitool import RequestTool
from bmtools.agent.executor import Executor, AgentExecutorWithTranslation from swarms.tools.agent.executor import Executor, AgentExecutorWithTranslation
from bmtools import get_logger from swarms.tools import get_logger
from bmtools.agent.BabyagiTools import BabyAGI from swarms.tools.agent.BabyagiTools import BabyAGI
# from bmtools.models.customllm import CustomLLM # from swarms.tools
#.models.customllm import CustomLLM
logger = get_logger(__name__) logger = get_logger(__name__)

@ -10,7 +10,7 @@ import aiohttp
import http.client import http.client
http.client._MAXLINE = 655360 http.client._MAXLINE = 655360
from bmtools import get_logger from swarms.utils import get_logger
logger = get_logger(__name__) logger = get_logger(__name__)

Loading…
Cancel
Save