From b4a85eb83e8001511afd96024f5bcc340032766d Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 3 Oct 2023 16:44:57 -0400 Subject: [PATCH] clean up Former-commit-id: 4ff7d1661ea810aa98c16ae9654bcbd96a166f83 --- swarms/agents/multi_modal_visual_agent.py | 73 ++++++++++------------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/swarms/agents/multi_modal_visual_agent.py b/swarms/agents/multi_modal_visual_agent.py index 616c67fb..83e9dcb0 100644 --- a/swarms/agents/multi_modal_visual_agent.py +++ b/swarms/agents/multi_modal_visual_agent.py @@ -1,58 +1,45 @@ -# coding: utf-8 -import argparse -import inspect -import math import os +import gradio as gr import random +import torch +import cv2 import re import uuid - -import cv2 -import gradio as gr -import matplotlib.pyplot as plt +from PIL import Image, ImageDraw, ImageOps, ImageFont +import math import numpy as np -import torch -import wget -from controlnet_aux import HEDdetector, MLSDdetector, OpenposeDetector -from diffusers import ( - ControlNetModel, - EulerAncestralDiscreteScheduler, - StableDiffusionControlNetPipeline, - StableDiffusionInpaintPipeline, - StableDiffusionInstructPix2PixPipeline, - StableDiffusionPipeline, - UniPCMultistepScheduler, -) +import argparse +import inspect +import tempfile +from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation +from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration, BlipForQuestionAnswering +from transformers import AutoImageProcessor, UperNetForSemanticSegmentation + +from diffusers import StableDiffusionPipeline, StableDiffusionInpaintPipeline, StableDiffusionInstructPix2PixPipeline +from diffusers import EulerAncestralDiscreteScheduler +from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker + +from controlnet_aux import OpenposeDetector, MLSDdetector, HEDdetector + from langchain.agents.initialize import initialize_agent from langchain.agents.tools import Tool from langchain.chains.conversation.memory import ConversationBufferMemory from langchain.llms.openai import OpenAI -from PIL import Image, ImageDraw, ImageFont, ImageOps -from transformers import ( - BlipForConditionalGeneration, - BlipForQuestionAnswering, - BlipProcessor, - pipeline, -) # Grounding DINO -# import groundingdino.datasets.transforms as T -from swarms.workers.models import ( - Compose, - Normalize, - RandomResize, - SLConfig, - ToTensor, - build_model, - clean_state_dict, - get_phrases_from_posmap, -) -from swarms.workers.models.segment_anything import ( - SamAutomaticMaskGenerator, - SamPredictor, - build_sam, -) +import groundingdino.datasets.transforms as T +from groundingdino.models import build_model +from groundingdino.util import box_ops +from groundingdino.util.slconfig import SLConfig +from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap + +# segment anything +from segment_anything import build_sam, SamPredictor, SamAutomaticMaskGenerator +import cv2 +import numpy as np +import matplotlib.pyplot as plt +import wget VISUAL_AGENT_PREFIX = """ Worker Multi-Modal Agent is designed to be able to assist with