[DEMO][Swarm of MultiModalityRobots][sequential_workflow with images

pull/197/head
Kye 1 year ago
parent a56b0b69b9
commit b457511bde

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

@ -0,0 +1,129 @@
"""
Swarm of multi modal autonomous agents for manufacturing!
---------------------------------------------------------
Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest
Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest
Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest
Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest
Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest
Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest
Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest
Flow:
health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
"""
from swarms.structs import Flow, SequentialWorkflow
import os
from dotenv import load_dotenv
from swarms.models import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
llm = GPT4VisionAPI(
openai_api_key=api_key
)
assembly_line = "playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg"
red_robots = "playground/demos/swarm_of_mma_manufacturing/red_robots.jpg"
robots = "playground/demos/swarm_of_mma_manufacturing/robots.jpg"
tesla_assembly_line = "playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg"
# Define detailed prompts for each agent
tasks = {
"health_safety": (
"Analyze the factory's working environment for health safety. Focus on"
" cleanliness, ventilation, spacing between workstations, and personal"
" protective equipment availability."
),
"productivity": (
"Review the factory's workflow efficiency, machine utilization, and"
" employee engagement. Identify operational delays or bottlenecks."
),
"safety": (
"Analyze the factory's safety measures, including fire exits, safety"
" signage, and emergency response equipment."
),
"security": (
"Evaluate the factory's security systems, entry/exit controls, and"
" potential vulnerabilities."
),
"sustainability": (
"Inspect the factory's sustainability practices, including waste"
" management, energy usage, and eco-friendly processes."
),
"efficiency": (
"Assess the manufacturing process's efficiency, considering the layout,"
" logistics, and automation level."
),
}
# Define prompts for each agent
health_safety_prompt = tasks["health_safety"]
productivity_prompt = tasks["productivity"]
safety_prompt = tasks["safety"]
security_prompt = tasks["security"]
sustainability_prompt = tasks["sustainability"]
efficiency_prompt = tasks["efficiency"]
# Health security agent
health_security_agent = Flow(
llm=llm,
sop_list=health_safety_prompt,
max_loops=2,
multi_modal=True
)
# Quality control agent
productivity_check_agent = Flow(
llm=llm,
sop=productivity_prompt,
max_loops=2,
multi_modal=True
)
# Security agent
security_check_agent = Flow(
llm=llm,
sop=security_prompt,
max_loops=2,
multi_modal=True
)
# Efficiency agent
efficiency_check_agent = Flow(
llm=llm,
sop=efficiency_prompt,
max_loops=2,
multi_modal=True
)
# Add the first task to the health_security_agent
health_check = health_security_agent.run(
"Analyze the safety of this factory",
robots
)
# Add the third task to the productivity_check_agent
productivity_check = productivity_check_agent.run(
health_check, assembly_line
)
# Add the fourth task to the security_check_agent
security_check = security_check_agent.add(
productivity_check, red_robots
)
# Add the fifth task to the efficiency_check_agent
efficiency_check = efficiency_check_agent.run(
security_check, tesla_assembly_line
)

@ -13,3 +13,125 @@ Efficiency agent: Agent that monitors the efficiency of the factory: input image
Flow:
health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
"""
from swarms.structs import Flow, SequentialWorkflow
from swarms.models import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)
llm = GPT4VisionAPI()
assembly_line = "assembly_line.jpg"
red_robots = "red_robots.jpg"
robots = "robots.jpg"
tesla_assembly_line = "tesla_assembly.jpg"
# Define detailed prompts for each agent
tasks = {
"health_safety": (
"Analyze the factory's working environment for health safety. Focus on"
" cleanliness, ventilation, spacing between workstations, and personal"
" protective equipment availability."
),
"productivity": (
"Review the factory's workflow efficiency, machine utilization, and"
" employee engagement. Identify operational delays or bottlenecks."
),
"safety": (
"Analyze the factory's safety measures, including fire exits, safety"
" signage, and emergency response equipment."
),
"security": (
"Evaluate the factory's security systems, entry/exit controls, and"
" potential vulnerabilities."
),
"sustainability": (
"Inspect the factory's sustainability practices, including waste"
" management, energy usage, and eco-friendly processes."
),
"efficiency": (
"Assess the manufacturing process's efficiency, considering the layout,"
" logistics, and automation level."
),
}
# Define prompts for each agent
health_safety_prompt = tasks["health_safety"]
productivity_prompt = tasks["productivity"]
safety_prompt = tasks["safety"]
security_prompt = tasks["security"]
sustainability_prompt = tasks["sustainability"]
efficiency_prompt = tasks["efficiency"]
# Health security agent
health_security_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + health_safety_prompt,
max_loops=2,
)
# Quality control agent
quality_control_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
max_loops=2,
)
# Quality control agent
productivity_check_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + productivity_prompt,
max_loops=2,
)
# Security agent
security_check_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + security_prompt,
max_loops=2,
)
# Efficiency agent
efficiency_check_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + efficiency_prompt,
max_loops=2,
)
# Sequential workflow
workflow = SequentialWorkflow(
max_loops=4,
name="Swarm of multi modal autonomous agents for manufacturing!",
description="Swarm of multi modal autonomous agents for manufacturing!",
)
# Add the first task to the health_security_agent
health_check = workflow.add(
health_security_agent,
"Analyze the safety of this factory",
robots
)
# Add the third task to the productivity_check_agent
productivity_check = workflow.add(
productivity_check_agent, health_check, assembly_line
)
# Add the fourth task to the security_check_agent
security_check = workflow.add(
security_check_agent, productivity_check, red_robots
)
# Add the fifth task to the efficiency_check_agent
efficiency_check = workflow.add(
efficiency_check_agent, security_check, tesla_assembly_line
)
# Run the workflow
workflow.run()

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "swarms"
version = "2.4.3"
version = "2.4.5"
description = "Swarms - Pytorch"
license = "MIT"
authors = ["Kye Gomez <kye@apac.ai>"]

@ -13,6 +13,49 @@ from termcolor import colored
class BaseMultiModalModel:
"""
Base class for multimodal models
Args:
model_name (Optional[str], optional): Model name. Defaults to None.
temperature (Optional[int], optional): Temperature. Defaults to 0.5.
max_tokens (Optional[int], optional): Max tokens. Defaults to 500.
max_workers (Optional[int], optional): Max workers. Defaults to 10.
top_p (Optional[int], optional): Top p. Defaults to 1.
top_k (Optional[int], optional): Top k. Defaults to 50.
beautify (Optional[bool], optional): Beautify. Defaults to False.
device (Optional[str], optional): Device. Defaults to "cuda".
max_new_tokens (Optional[int], optional): Max new tokens. Defaults to 500.
retries (Optional[int], optional): Retries. Defaults to 3.
Examples:
>>> from swarms.models.base_multimodal_model import BaseMultiModalModel
>>> model = BaseMultiModalModel()
>>> model.run("Generate a summary of this text")
>>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")
>>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
>>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
>>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
>>> model.generate_summary("Generate a summary of this text")
>>> model.set_temperature(0.5)
>>> model.set_max_tokens(500)
>>> model.get_generation_time()
>>> model.get_chat_history()
>>> model.get_unique_chat_history()
>>> model.get_chat_history_length()
>>> model.get_unique_chat_history_length()
>>> model.get_chat_history_tokens()
>>> model.print_beautiful("Print this beautifully")
>>> model.stream("Stream this")
>>> model.unique_chat_history()
>>> model.clear_chat_history()
>>> model.get_img_from_web("https://www.google.com/images/branding/googlelogo/")
"""
def __init__(
self,
model_name: Optional[str],

@ -1,18 +1,16 @@
import logging
import asyncio
import base64
from typing import Optional
import concurrent.futures
from termcolor import colored
import json
import logging
import os
from concurrent.futures import ThreadPoolExecutor
from typing import List, Tuple
from typing import List, Optional, Tuple
import aiohttp
import requests
from dotenv import load_dotenv
from termcolor import colored
try:
import cv2
@ -94,9 +92,10 @@ class GPT4VisionAPI:
def download_img_then_encode(self, img: str):
"""Download image from URL then encode image to base64 using requests"""
pass
# Function to handle vision tasks
def run(self, task: str, img: str):
def run(self, task: Optional[str] = None, img: Optional[str] = None, *args, **kwargs):
"""Run the model."""
try:
base64_image = self.encode_image(img)
@ -131,6 +130,7 @@ class GPT4VisionAPI:
)
out = response.json()
content = print(out)
content = out["choices"][0]["message"]["content"]
if self.streaming_enabled:
@ -263,6 +263,7 @@ class GPT4VisionAPI:
)
out = response.json()
content = print(out)
content = out["choices"][0]["message"]["content"]
if self.streaming_enabled:
@ -287,6 +288,14 @@ class GPT4VisionAPI:
):
"""
Run the model on multiple tasks and images all at once using concurrent
Args:
tasks (List[str]): List of tasks
imgs (List[str]): List of image paths
Returns:
List[str]: List of responses
"""
# Instantiate the thread pool executor
@ -301,8 +310,8 @@ class GPT4VisionAPI:
async def arun(
self,
task: str,
img: str,
task: Optional[str] = None,
img: Optional[str] = None,
):
"""
Asynchronously run the model

@ -9,9 +9,12 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
from termcolor import colored
from swarms.tools.tool import BaseTool
from swarms.utils.code_interpreter import SubprocessCodeInterpreter
from swarms.utils.parse_code import extract_code_in_backticks_in_string
from swarms.tools.tool import BaseTool
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)
# System prompt
FLOW_SYSTEM_PROMPT = f"""
@ -154,7 +157,7 @@ class Flow:
retry_interval (int): The interval between retry attempts
interactive (bool): Whether or not to run in interactive mode
dashboard (bool): Whether or not to print the dashboard
dynamic_temperature(bool): Dynamical temperature handling
dynamic_temperature_enabled(bool): Dynamical temperature handling
**kwargs (Any): Any additional keyword arguments
Methods:
@ -182,7 +185,6 @@ class Flow:
add_message_to_memory_and_truncate: Add the message to the memory and truncate
print_dashboard: Print dashboard
activate_autonomous_agent: Print the autonomous agent activation message
dynamic_temperature: Dynamically change the temperature
_check_stopping_condition: Check if the stopping condition is met
format_prompt: Format the prompt
get_llm_init_params: Get the llm init params
@ -236,18 +238,20 @@ class Flow:
dynamic_loops: Optional[bool] = False,
interactive: bool = False,
dashboard: bool = False,
agent_name: str = " Autonomous Agent XYZ1B",
agent_name: str = "Autonomous Agent XYZ1B",
agent_description: str = None,
system_prompt: str = FLOW_SYSTEM_PROMPT,
tools: List[BaseTool] = None,
dynamic_temperature: bool = False,
sop: str = None,
dynamic_temperature_enabled: Optional[bool] = False,
sop: Optional[str] = None,
sop_list: Optional[List[str]] = None,
saved_state_path: Optional[str] = "flow_state.json",
autosave: bool = False,
context_length: int = 8192,
autosave: Optional[bool] = False,
context_length: Optional[int] = 8192,
user_name: str = "Human:",
self_healing: bool = False,
self_healing_enabled: bool = False,
code_interpreter: bool = False,
multi_modal: Optional[bool] = None,
**kwargs: Any,
):
self.llm = llm
@ -257,22 +261,17 @@ class Flow:
self.loop_interval = loop_interval
self.retry_attempts = retry_attempts
self.retry_interval = retry_interval
self.feedback = []
self.memory = []
self.task = None
self.stopping_token = stopping_token # or "<DONE>"
self.interactive = interactive
self.dashboard = dashboard
self.return_history = return_history
self.dynamic_temperature = dynamic_temperature
self.dynamic_temperature_enabled = dynamic_temperature_enabled
self.dynamic_loops = dynamic_loops
self.user_name = user_name
self.context_length = context_length
# SOPS to inject into the system prompt
self.sop = sop
# The max_loops will be set dynamically if the dynamic_loop
if self.dynamic_loops:
self.max_loops = "auto"
self.sop_list = sop_list
self.tools = tools or []
self.system_prompt = system_prompt
self.agent_name = agent_name
@ -280,8 +279,27 @@ class Flow:
self.saved_state_path = saved_state_path
self.autosave = autosave
self.response_filters = []
self.self_healing = self_healing
self.self_healing_enabled = self_healing_enabled
self.code_interpreter = code_interpreter
self.multi_modal = multi_modal
# The max_loops will be set dynamically if the dynamic_loop
if self.dynamic_loops:
self.max_loops = "auto"
# If multimodal = yes then set the sop to the multimodal sop
if self.multi_modal:
self.sop = MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1
# If the user inputs a list of strings for the sop then join them and set the sop
if self.sop_list:
self.sop = "\n".join(self.sop_list)
# Memory
self.feedback = []
self.memory = []
# Initialize the code executor
self.code_executor = SubprocessCodeInterpreter()
def provide_feedback(self, feedback: str) -> None:
@ -461,7 +479,7 @@ class Flow:
Retry Interval: {self.retry_interval}
Interactive: {self.interactive}
Dashboard: {self.dashboard}
Dynamic Temperature: {self.dynamic_temperature}
Dynamic Temperature: {self.dynamic_temperature_enabled}
Autosave: {self.autosave}
Saved State: {self.saved_state_path}
Model Configuration: {model_config}
@ -528,10 +546,9 @@ class Flow:
self.print_dashboard(task)
loop_count = 0
# While the max_loops is auto or the loop count is less than the max_loops
while self.max_loops == "auto" or loop_count < self.max_loops:
# Loop count
loop_count += 1
print(
@ -547,7 +564,7 @@ class Flow:
break
# Adjust temperature, comment if no work
if self.dynamic_temperature:
if self.dynamic_temperature_enabled:
self.dynamic_temperature()
# Preparing the prompt
@ -653,7 +670,7 @@ class Flow:
break
# Adjust temperature, comment if no work
if self.dynamic_temperature:
if self.dynamic_temperature_enabled:
self.dynamic_temperature()
# Preparing the prompt
@ -998,7 +1015,7 @@ class Flow:
"retry_interval": self.retry_interval,
"interactive": self.interactive,
"dashboard": self.dashboard,
"dynamic_temperature": self.dynamic_temperature,
"dynamic_temperature": self.dynamic_temperature_enabled,
}
with open(file_path, "w") as f:

@ -29,6 +29,18 @@ class Task:
Task class for running a task in a sequential workflow.
Args:
description (str): The description of the task.
flow (Union[Callable, Flow]): The model or flow to execute the task.
args (List[Any]): Additional arguments to pass to the task execution.
kwargs (Dict[str, Any]): Additional keyword arguments to pass to the task execution.
result (Any): The result of the task execution.
history (List[Any]): The history of the task execution.
Methods:
execute: Execute the task.
Examples:
>>> from swarms.structs import Task, Flow
>>> from swarms.models import OpenAIChat
@ -37,8 +49,6 @@ class Task:
>>> task.execute()
>>> task.result
"""
description: str
@ -54,9 +64,6 @@ class Task:
Raises:
ValueError: If a Flow instance is used as a task and the 'task' argument is not provided.
"""
if isinstance(self.flow, Flow):
# Add a prompt to notify the Flow of the sequential workflow
@ -114,14 +121,20 @@ class SequentialWorkflow:
dashboard: bool = False
def add(
self, task: str, flow: Union[Callable, Flow], *args, **kwargs
self,
flow: Union[Callable, Flow],
task: Optional[str] = None,
img: Optional[str] = None,
*args,
**kwargs,
) -> None:
"""
Add a task to the workflow.
Args:
task (str): The task description or the initial input for the Flow.
flow (Union[Callable, Flow]): The model or flow to execute the task.
task (str): The task description or the initial input for the Flow.
img (str): The image to understand for the task.
*args: Additional arguments to pass to the task execution.
**kwargs: Additional keyword arguments to pass to the task execution.
"""
@ -130,9 +143,22 @@ class SequentialWorkflow:
kwargs["task"] = task # Set the task as a keyword argument for Flow
# Append the task to the tasks list
self.tasks.append(
Task(description=task, flow=flow, args=list(args), kwargs=kwargs)
)
if self.img:
self.tasks.append(
Task(
description=task,
flow=flow,
args=list(args),
kwargs=kwargs,
img=img,
)
)
else:
self.tasks.append(
Task(
description=task, flow=flow, args=list(args), kwargs=kwargs
)
)
def reset_workflow(self) -> None:
"""Resets the workflow by clearing the results of each task."""
@ -148,18 +174,16 @@ class SequentialWorkflow:
"""
return {task.description: task.result for task in self.tasks}
def remove_task(self, task_description: str) -> None:
def remove_task(self, task: str) -> None:
"""Remove tasks from sequential workflow"""
self.tasks = [
task for task in self.tasks if task.description != task_description
]
self.tasks = [task for task in self.tasks if task.description != task]
def update_task(self, task_description: str, **updates) -> None:
def update_task(self, task: str, **updates) -> None:
"""
Updates the arguments of a task in the workflow.
Args:
task_description (str): The description of the task to update.
task (str): The description of the task to update.
**updates: The updates to apply to the task.
Raises:
@ -178,11 +202,11 @@ class SequentialWorkflow:
"""
for task in self.tasks:
if task.description == task_description:
if task.description == task:
task.kwargs.update(updates)
break
else:
raise ValueError(f"Task {task_description} not found in workflow.")
raise ValueError(f"Task {task} not found in workflow.")
def save_workflow_state(
self,
@ -272,6 +296,7 @@ class SequentialWorkflow:
)
def workflow_shutdown(self, **kwargs) -> None:
"""Shuts down the workflow."""
print(
colored(
"""
@ -282,6 +307,7 @@ class SequentialWorkflow:
)
def add_objective_to_workflow(self, task: str, **kwargs) -> None:
"""Adds an objective to the workflow."""
print(
colored(
"""

Loading…
Cancel
Save