pull/58/head
Kye 1 year ago
parent 90f71b2fa7
commit 0b43d59ce9

@ -169,42 +169,42 @@ def compile(task: str):
# mm model workers # mm model workers
# import torch import torch
# from PIL import Image from PIL import Image
# from transformers import ( from transformers import (
# BlipForQuestionAnswering, BlipForQuestionAnswering,
# BlipProcessor, BlipProcessor,
# ) )
# @tool @tool
# def VQAinference(self, inputs): def VQAinference(self, inputs):
# """ """
# Answer Question About The Image, VQA Multi-Modal Worker agent Answer Question About The Image, VQA Multi-Modal Worker agent
# description="useful when you need an answer for a question based on an image. " description="useful when you need an answer for a question based on an image. "
# "like: what is the background color of the last image, how many cats in this figure, what is in this figure. " "like: what is the background color of the last image, how many cats in this figure, what is in this figure. "
# "The input to this tool should be a comma separated string of two, representing the image_path and the question", "The input to this tool should be a comma separated string of two, representing the image_path and the question",
# """ """
# device = "cuda:0" device = "cuda:0"
# torch_dtype = torch.float16 if "cuda" in device else torch.float32 torch_dtype = torch.float16 if "cuda" in device else torch.float32
# processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
# model = BlipForQuestionAnswering.from_pretrained( model = BlipForQuestionAnswering.from_pretrained(
# "Salesforce/blip-vqa-base", torch_dtype=torch_dtype "Salesforce/blip-vqa-base", torch_dtype=torch_dtype
# ).to(device) ).to(device)
# image_path, question = inputs.split(",") image_path, question = inputs.split(",")
# raw_image = Image.open(image_path).convert("RGB") raw_image = Image.open(image_path).convert("RGB")
# inputs = processor(raw_image, question, return_tensors="pt").to( inputs = processor(raw_image, question, return_tensors="pt").to(
# device, torch_dtype device, torch_dtype
# ) )
# out = model.generate(**inputs) out = model.generate(**inputs)
# answer = processor.decode(out[0], skip_special_tokens=True) answer = processor.decode(out[0], skip_special_tokens=True)
# logger.debug( logger.debug(
# f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, " f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "
# f"Output Answer: {answer}" f"Output Answer: {answer}"
# ) )
# return answer return answer

@ -9,7 +9,7 @@ from langchain_experimental.autonomous_agents import AutoGPT
from swarms.tools.autogpt import ( from swarms.tools.autogpt import (
ReadFileTool, ReadFileTool,
# VQAinference, VQAinference,
WriteFileTool, WriteFileTool,
compile, compile,
process_csv, process_csv,

Loading…
Cancel
Save