clean up

2 years ago · cc041dcf77
parent 9b5414fbc4
commit cc041dcf77
5 changed files with 68 additions and 42 deletions
--- a/mm_agent_example.py
+++ b/mm_agent_example.py
@ -1,7 +1,7 @@
 from swarms.agents import MultiModalAgent

 load_dict = {
-    "ImageCaptioning": "cuda"
+    "ImageCaptioning": "cpu"
 }

 node = MultiModalAgent(load_dict)
--- a/requirements.txt
+++ b/requirements.txt
@ -28,6 +28,32 @@ colored



+addict
+albumentations
+basicsr
+controlnet-aux
+diffusers
+einops
+imageio
+imageio-ffmpeg
+invisible-watermark
+kornia
+numpy
+omegaconf
+open_clip_torch
+openai
+opencv-python
+prettytable
+safetensors
+streamlit
+test-tube
+timm
+torchmetrics
+transformers
+webdataset
+yapf
+
+
 mkdocs
 mkdocs-material
 mkdocs-glightbox
--- a/swarms/agents/multi_modal_visual_agent.py
+++ b/swarms/agents/multi_modal_visual_agent.py
@ -1,5 +1,4 @@
 import os
-import gradio as gr
 import random
 import torch
 import cv2
--- a/swarms/tools/autogpt.py
+++ b/swarms/tools/autogpt.py
@ -17,12 +17,11 @@ from langchain.chains.qa_with_sources.loading import BaseCombineDocumentsChain
 from langchain.chat_models import ChatOpenAI
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.tools import BaseTool
+from langchain.tools.file_management.read import ReadFileTool
+from langchain.tools.file_management.write import WriteFileTool
 from pydantic import Field

 from swarms.utils.logger import logger
-from langchain.tools.file_management.write import WriteFileTool
-from langchain.tools.file_management.read import ReadFileTool
-


@contextmanager
@ -141,6 +140,7 @@ query_website_tool = WebpageQATool(qa_chain=load_qa_with_sources_chain(llm))
 # code_intepret = CodeInterpreter()
 import interpreter

+
@tool
 def compile(task: str):
    """
@ -169,41 +169,42 @@ def compile(task: str):

 # mm model workers

-import torch
-from PIL import Image
-from transformers import (
-    BlipForQuestionAnswering,
-    BlipProcessor,
-)
-
-@tool
-def VQAinference(self, inputs):
-    """
-    Answer Question About The Image, VQA Multi-Modal Worker agent
-    description="useful when you need an answer for a question based on an image. "
-    "like: what is the background color of the last image, how many cats in this figure, what is in this figure. "
-    "The input to this tool should be a comma separated string of two, representing the image_path and the question",
+# import torch
+# from PIL import Image
+# from transformers import (
+#     BlipForQuestionAnswering,
+#     BlipProcessor,
+# )
+
+
+# @tool
+# def VQAinference(self, inputs):
+#     """
+#     Answer Question About The Image, VQA Multi-Modal Worker agent
+#     description="useful when you need an answer for a question based on an image. "
+#     "like: what is the background color of the last image, how many cats in this figure, what is in this figure. "
+#     "The input to this tool should be a comma separated string of two, representing the image_path and the question",
    
-    """
-    device = "cuda:0"
-    torch_dtype = torch.float16 if "cuda" in device else torch.float32
-    processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
-    model = BlipForQuestionAnswering.from_pretrained(
-        "Salesforce/blip-vqa-base", torch_dtype=torch_dtype
-    ).to(device)
-
-    image_path, question = inputs.split(",")
-    raw_image = Image.open(image_path).convert("RGB")
-    inputs = processor(raw_image, question, return_tensors="pt").to(
-        device, torch_dtype
-    )
-    out = model.generate(**inputs)
-    answer = processor.decode(out[0], skip_special_tokens=True)
-
-    logger.debug(
-        f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "
-        f"Output Answer: {answer}"
-    )
-
-    return answer
+#     """
+#     device = "cuda:0"
+#     torch_dtype = torch.float16 if "cuda" in device else torch.float32
+#     processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
+#     model = BlipForQuestionAnswering.from_pretrained(
+#         "Salesforce/blip-vqa-base", torch_dtype=torch_dtype
+#     ).to(device)
+
+#     image_path, question = inputs.split(",")
+#     raw_image = Image.open(image_path).convert("RGB")
+#     inputs = processor(raw_image, question, return_tensors="pt").to(
+#         device, torch_dtype
+#     )
+#     out = model.generate(**inputs)
+#     answer = processor.decode(out[0], skip_special_tokens=True)
+
+#     logger.debug(
+#         f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "
+#         f"Output Answer: {answer}"
+#     )
+
+#     return answer

--- a/swarms/workers/worker.py
+++ b/swarms/workers/worker.py
@ -174,7 +174,7 @@ class Worker:
            query_website_tool,
            HumanInputRun(),
            compile,
-            VQAinference
+            # VQAinference
        ]
        if external_tools is not None:
            self.tools.extend(external_tools)