pull/55/head
Kye 1 year ago
parent c46c9c00a7
commit 84dd4d2aca

@ -10,19 +10,17 @@ from langchain.agents import tool
from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.docstore.document import Document
from langchain.memory.chat_message_histories import FileChatMessageHistory
from langchain.tools.human.tool import HumanInputRun
ROOT_DIR = "./data/"
from langchain.chains.qa_with_sources.loading import BaseCombineDocumentsChain
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.tools import BaseTool, DuckDuckGoSearchRun
from langchain.tools.file_management.read import ReadFileTool
from langchain.tools.file_management.write import WriteFileTool
from langchain.tools import BaseTool
from pydantic import Field
from swarms.utils.logger import logger
llm = ChatOpenAI(model_name="gpt-4", temperature=1.0)
@ -161,3 +159,48 @@ def compile(task: str):
interpreter.chat()
interpreter.reset()
# mm model workers
import os
import torch
from PIL import Image
from transformers import (
BlipForQuestionAnswering,
BlipProcessor,
)
@tool
def VQAinference(self, inputs):
"""
Answer Question About The Image, VQA Multi-Modal Worker agent
description="useful when you need an answer for a question based on an image. "
"like: what is the background color of the last image, how many cats in this figure, what is in this figure. "
"The input to this tool should be a comma separated string of two, representing the image_path and the question",
"""
device = "cuda:0"
torch_dtype = torch.float16 if "cuda" in device else torch.float32
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained(
"Salesforce/blip-vqa-base", torch_dtype=torch_dtype
).to(device)
image_path, question = inputs.split(",")
raw_image = Image.open(image_path).convert("RGB")
inputs = processor(raw_image, question, return_tensors="pt").to(
device, torch_dtype
)
out = model.generate(**inputs)
answer = processor.decode(out[0], skip_special_tokens=True)
logger.debug(
f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, "
f"Output Answer: {answer}"
)
return answer

@ -12,7 +12,8 @@ from swarms.tools.autogpt import (
process_csv,
# web_search,
query_website_tool,
compile
compile,
VQAinference
)
from swarms.utils.decorators import error_decorator, log_decorator, timing_decorator
@ -79,7 +80,8 @@ class Worker:
#email
#pdf
# Tool(name="Goal Decomposition Tool", func=todo_chain.run, description="Use Case: Decompose ambitious goals into as many explicit and well defined tasks for an AI agent to follow. Rules and Regulations, don't use this tool too often only in the beginning when the user grants you a mission."),
compile
compile,
VQAinference
]
if external_tools is not None:
self.tools.extend(external_tools)

Loading…
Cancel
Save