You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
82 lines
2.0 KiB
82 lines
2.0 KiB
11 months ago
|
# Importing necessary modules
|
||
|
import os
|
||
11 months ago
|
|
||
11 months ago
|
from dotenv import load_dotenv
|
||
11 months ago
|
|
||
11 months ago
|
from swarms import Agent, OpenAIChat
|
||
9 months ago
|
from playground.memory.chroma_db import ChromaDB
|
||
11 months ago
|
from swarms.prompts.visual_cot import VISUAL_CHAIN_OF_THOUGHT
|
||
8 months ago
|
from swarms import tool
|
||
11 months ago
|
|
||
|
# Loading environment variables from .env file
|
||
|
load_dotenv()
|
||
|
|
||
|
# Getting the Gemini API key from environment variables
|
||
|
gemini_api_key = os.getenv("GEMINI_API_KEY")
|
||
|
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||
|
|
||
|
llm = OpenAIChat(
|
||
|
openai_api_key=openai_api_key,
|
||
|
max_tokens=1000,
|
||
|
temperature=0.2,
|
||
|
)
|
||
|
|
||
|
# Making an instance of the ChromaDB class
|
||
|
memory = ChromaDB(
|
||
|
metric="cosine",
|
||
|
n_results=3,
|
||
|
multimodal=True,
|
||
11 months ago
|
# docs_folder="images",
|
||
11 months ago
|
output_dir="results",
|
||
|
)
|
||
|
|
||
|
|
||
|
# Defining tool by creating a function and wrapping it with the @tool decorator and
|
||
|
# providing the necessary parameters and docstrings to show the usage of the tool.
|
||
|
@tool
|
||
|
def make_new_file(file: str, content: str):
|
||
|
"""
|
||
|
Make a new file.
|
||
|
|
||
|
This function creates a new file with the given name.
|
||
|
|
||
|
Parameters:
|
||
|
file (str): The name of the file to be created.
|
||
|
|
||
|
Returns:
|
||
|
dict: A dictionary containing the status of the operation.
|
||
|
"""
|
||
|
with open(file, "w") as f:
|
||
|
f.write(f"{content}")
|
||
|
|
||
|
|
||
|
# Initializing the agent with the Gemini instance and other parameters
|
||
|
agent = Agent(
|
||
|
llm=llm,
|
||
|
agent_name="Multi-Modal RAG Agent",
|
||
|
agent_description=(
|
||
|
"This agent fuses together the capabilities of Gemini and"
|
||
|
" Visual Chain of Thought to answer questions based on the"
|
||
|
" input image."
|
||
|
),
|
||
|
max_loops="auto",
|
||
|
autosave=True,
|
||
|
sop=VISUAL_CHAIN_OF_THOUGHT,
|
||
|
verbose=True,
|
||
11 months ago
|
# tools=[make_new_file],
|
||
11 months ago
|
long_term_memory=memory,
|
||
|
)
|
||
|
|
||
|
|
||
|
# Defining the task and image path
|
||
|
task = (
|
||
|
"What is the content of this image, return exactly what you see"
|
||
|
" in the image."
|
||
|
)
|
||
|
img = "images/Screenshot_48.png"
|
||
|
|
||
|
|
||
|
# Running the agent with the specified task and image
|
||
|
out = agent.run(task=task, img=img)
|
||
|
print(out)
|