diff --git a/README.md b/README.md index a1b9772e..4320259c 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,10 @@ llm = OpenAIChat( ) -## Initialize the workflow +## Initialize the Agent agent = Agent(llm=llm, max_loops=1, dashboard=True) -# Run the workflow on a task +# Run the Agent on a task out = agent.run("Generate a 10,000 word blog on health and wellness.") @@ -129,14 +129,25 @@ for task in workflow.tasks: - Run the agent with multiple modalities useful for various real-world tasks in manufacturing, logistics, and health. ```python -from swarms.structs import Agent +# Description: This is an example of how to use the Agent class to run a multi-modal workflow +import os +from dotenv import load_dotenv from swarms.models.gpt4_vision_api import GPT4VisionAPI -from swarms.prompts.multi_modal_autonomous_instruction_prompt import ( - MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, -) +from swarms.structs import Agent + +# Load the environment variables +load_dotenv() -llm = GPT4VisionAPI() +# Get the API key from the environment +api_key = os.environ.get("OPENAI_API_KEY") +# Initialize the language model +llm = GPT4VisionAPI( + openai_api_key=api_key, + max_tokens=500, +) + +# Initialize the task task = ( "Analyze this image of an assembly line and identify any issues such as" " misaligned parts, defects, or deviations from the standard assembly" @@ -148,13 +159,15 @@ img = "assembly_line.jpg" ## Initialize the workflow agent = Agent( llm=llm, - max_loops='auto' - sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, + max_loops="auto", + autosave=True, dashboard=True, + multi_modal=True ) -agent.run(task=task, img=img) - +# Run the workflow on a task +out = agent.run(task=task, img=img) +print(out) ``` diff --git a/multi_modal_auto_agent.py b/multi_modal_auto_agent.py index e0fd7f06..fc1e9459 100644 --- a/multi_modal_auto_agent.py +++ b/multi_modal_auto_agent.py @@ -1,21 +1,22 @@ +# Description: This is an example of how to use the Agent class to run a multi-modal workflow import os - from dotenv import load_dotenv - from swarms.models.gpt4_vision_api import GPT4VisionAPI -from swarms.prompts.multi_modal_autonomous_instruction_prompt import ( - MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, -) from swarms.structs import Agent +# Load the environment variables load_dotenv() +# Get the API key from the environment api_key = os.environ.get("OPENAI_API_KEY") +# Initialize the language model llm = GPT4VisionAPI( openai_api_key=api_key, + max_tokens=500, ) +# Initialize the language model task = "What is the color of the object?" img = "images/swarms.jpeg" @@ -23,10 +24,11 @@ img = "images/swarms.jpeg" agent = Agent( llm=llm, max_loops="auto", - sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, autosave=True, dashboard=True, + multi_modal=True ) +# Run the workflow on a task out = agent.run(task=task, img=img) print(out)