swarms/playground/agents/multi_modal_auto_agent.py

# Description: This is an example of how to use the Agent class to run a multi-modal workflow
import os
from dotenv import load_dotenv
from swarms.models.gpt4_vision_api import GPT4VisionAPI
from swarms.structs import Agent

# Load the environment variables
load_dotenv()

# Get the API key from the environment
api_key = os.environ.get("OPENAI_API_KEY")

# Initialize the language model
llm = GPT4VisionAPI(
    openai_api_key=api_key,
    max_tokens=500,
)

# Initialize the language model
task = "What is the color of the object?"
img = "images/swarms.jpeg"

## Initialize the workflow
agent = Agent(
    llm=llm,
    max_loops="auto",
    autosave=True,
    dashboard=True,
    multi_modal=True,
)

# Run the workflow on a task
out = agent.run(task=task, img=img)
print(out)
[FEAT][Readme] 1 year ago			`# Description: This is an example of how to use the Agent class to run a multi-modal workflow`
[BUG][OpenAIChat model name][GPT4VisionAPI][system prompt] 1 year ago			`import os`
			`from dotenv import load_dotenv`
gpt4vision api 1 year ago			`from swarms.models.gpt4_vision_api import GPT4VisionAPI`
[BUG][OpenAIChat model name][GPT4VisionAPI][system prompt] 1 year ago			`from swarms.structs import Agent`

[FEAT][Readme] 1 year ago			`# Load the environment variables`
[BUG][OpenAIChat model name][GPT4VisionAPI][system prompt] 1 year ago			`load_dotenv()`
gpt4vision api 1 year ago
[FEAT][Readme] 1 year ago			`# Get the API key from the environment`
[BUG][OpenAIChat model name][GPT4VisionAPI][system prompt] 1 year ago			`api_key = os.environ.get("OPENAI_API_KEY")`
gpt4vision api 1 year ago
[FEAT][Readme] 1 year ago			`# Initialize the language model`
[BUG][OpenAIChat model name][GPT4VisionAPI][system prompt] 1 year ago			`llm = GPT4VisionAPI(`
			`openai_api_key=api_key,`
[FEAT][Readme] 1 year ago			`max_tokens=500,`
[BUG][OpenAIChat model name][GPT4VisionAPI][system prompt] 1 year ago			`)`
gpt4vision api 1 year ago
[FEAT][Readme] 1 year ago			`# Initialize the language model`
gpt4vision api 1 year ago			`task = "What is the color of the object?"`
			`img = "images/swarms.jpeg"`

			`## Initialize the workflow`
flow -> agent, developer swarm with prompts, maybe add the ability to create the classes 1 year ago			`agent = Agent(`
gpt4vision api 1 year ago			`llm=llm,`
tests for gpt4visionapi 1 year ago			`max_loops="auto",`
[DOCS] 1 year ago			`autosave=True,`
			`dashboard=True,`
[AbstractLLM] 1 year ago			`multi_modal=True,`
gpt4vision api 1 year ago			`)`

[FEAT][Readme] 1 year ago			`# Run the workflow on a task`
[BUG][OpenAIChat model name][GPT4VisionAPI][system prompt] 1 year ago			`out = agent.run(task=task, img=img)`
			`print(out)`