diff --git a/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg b/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg new file mode 100644 index 00000000..5e9a0fff Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg differ diff --git a/playground/demos/swarm_of_mma_manufacturing/flow_iter.py b/playground/demos/swarm_of_mma_manufacturing/flow_iter.py new file mode 100644 index 00000000..57a33535 --- /dev/null +++ b/playground/demos/swarm_of_mma_manufacturing/flow_iter.py @@ -0,0 +1,129 @@ +""" +Swarm of multi modal autonomous agents for manufacturing! +--------------------------------------------------------- +Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest +Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest +Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest +Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest +Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest +Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest +Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest + + +Flow: +health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent +""" +from swarms.structs import Flow, SequentialWorkflow +import os +from dotenv import load_dotenv +from swarms.models import GPT4VisionAPI +from swarms.prompts.multi_modal_autonomous_instruction_prompt import ( + MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, +) + +load_dotenv() +api_key = os.getenv("OPENAI_API_KEY") + +llm = GPT4VisionAPI( + openai_api_key=api_key +) + +assembly_line = "playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg" +red_robots = "playground/demos/swarm_of_mma_manufacturing/red_robots.jpg" +robots = "playground/demos/swarm_of_mma_manufacturing/robots.jpg" +tesla_assembly_line = "playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg" + + +# Define detailed prompts for each agent +tasks = { + "health_safety": ( + "Analyze the factory's working environment for health safety. Focus on" + " cleanliness, ventilation, spacing between workstations, and personal" + " protective equipment availability." + ), + "productivity": ( + "Review the factory's workflow efficiency, machine utilization, and" + " employee engagement. Identify operational delays or bottlenecks." + ), + "safety": ( + "Analyze the factory's safety measures, including fire exits, safety" + " signage, and emergency response equipment." + ), + "security": ( + "Evaluate the factory's security systems, entry/exit controls, and" + " potential vulnerabilities." + ), + "sustainability": ( + "Inspect the factory's sustainability practices, including waste" + " management, energy usage, and eco-friendly processes." + ), + "efficiency": ( + "Assess the manufacturing process's efficiency, considering the layout," + " logistics, and automation level." + ), +} + + +# Define prompts for each agent +health_safety_prompt = tasks["health_safety"] +productivity_prompt = tasks["productivity"] +safety_prompt = tasks["safety"] +security_prompt = tasks["security"] +sustainability_prompt = tasks["sustainability"] +efficiency_prompt = tasks["efficiency"] + + +# Health security agent +health_security_agent = Flow( + llm=llm, + sop_list=health_safety_prompt, + max_loops=2, + multi_modal=True +) + +# Quality control agent +productivity_check_agent = Flow( + llm=llm, + sop=productivity_prompt, + max_loops=2, + multi_modal=True +) + +# Security agent +security_check_agent = Flow( + llm=llm, + sop=security_prompt, + max_loops=2, + multi_modal=True +) + +# Efficiency agent +efficiency_check_agent = Flow( + llm=llm, + sop=efficiency_prompt, + max_loops=2, + multi_modal=True +) + + +# Add the first task to the health_security_agent +health_check = health_security_agent.run( + "Analyze the safety of this factory", + robots +) + +# Add the third task to the productivity_check_agent +productivity_check = productivity_check_agent.run( + health_check, assembly_line +) + +# Add the fourth task to the security_check_agent +security_check = security_check_agent.add( + productivity_check, red_robots +) + +# Add the fifth task to the efficiency_check_agent +efficiency_check = efficiency_check_agent.run( + security_check, tesla_assembly_line +) + diff --git a/playground/demos/swarm_of_mma_manufacturing/main.py b/playground/demos/swarm_of_mma_manufacturing/main.py index ebb00768..88b82c62 100644 --- a/playground/demos/swarm_of_mma_manufacturing/main.py +++ b/playground/demos/swarm_of_mma_manufacturing/main.py @@ -13,3 +13,125 @@ Efficiency agent: Agent that monitors the efficiency of the factory: input image Flow: health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent """ +from swarms.structs import Flow, SequentialWorkflow +from swarms.models import GPT4VisionAPI +from swarms.prompts.multi_modal_autonomous_instruction_prompt import ( + MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, +) + + +llm = GPT4VisionAPI() + +assembly_line = "assembly_line.jpg" +red_robots = "red_robots.jpg" +robots = "robots.jpg" +tesla_assembly_line = "tesla_assembly.jpg" + + +# Define detailed prompts for each agent +tasks = { + "health_safety": ( + "Analyze the factory's working environment for health safety. Focus on" + " cleanliness, ventilation, spacing between workstations, and personal" + " protective equipment availability." + ), + "productivity": ( + "Review the factory's workflow efficiency, machine utilization, and" + " employee engagement. Identify operational delays or bottlenecks." + ), + "safety": ( + "Analyze the factory's safety measures, including fire exits, safety" + " signage, and emergency response equipment." + ), + "security": ( + "Evaluate the factory's security systems, entry/exit controls, and" + " potential vulnerabilities." + ), + "sustainability": ( + "Inspect the factory's sustainability practices, including waste" + " management, energy usage, and eco-friendly processes." + ), + "efficiency": ( + "Assess the manufacturing process's efficiency, considering the layout," + " logistics, and automation level." + ), +} + + +# Define prompts for each agent +health_safety_prompt = tasks["health_safety"] +productivity_prompt = tasks["productivity"] +safety_prompt = tasks["safety"] +security_prompt = tasks["security"] +sustainability_prompt = tasks["sustainability"] +efficiency_prompt = tasks["efficiency"] + + +# Health security agent +health_security_agent = Flow( + llm=llm, + sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + health_safety_prompt, + max_loops=2, +) + +# Quality control agent +quality_control_agent = Flow( + llm=llm, + sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, + max_loops=2, +) + +# Quality control agent +productivity_check_agent = Flow( + llm=llm, + sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + productivity_prompt, + max_loops=2, +) + +# Security agent +security_check_agent = Flow( + llm=llm, + sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + security_prompt, + max_loops=2, +) + +# Efficiency agent +efficiency_check_agent = Flow( + llm=llm, + sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + efficiency_prompt, + max_loops=2, +) + + +# Sequential workflow +workflow = SequentialWorkflow( + max_loops=4, + name="Swarm of multi modal autonomous agents for manufacturing!", + description="Swarm of multi modal autonomous agents for manufacturing!", +) + +# Add the first task to the health_security_agent +health_check = workflow.add( + health_security_agent, + "Analyze the safety of this factory", + robots +) + +# Add the third task to the productivity_check_agent +productivity_check = workflow.add( + productivity_check_agent, health_check, assembly_line +) + +# Add the fourth task to the security_check_agent +security_check = workflow.add( + security_check_agent, productivity_check, red_robots +) + +# Add the fifth task to the efficiency_check_agent +efficiency_check = workflow.add( + efficiency_check_agent, security_check, tesla_assembly_line +) + + +# Run the workflow +workflow.run() diff --git a/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg b/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg new file mode 100644 index 00000000..f086fa67 Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg differ diff --git a/playground/demos/swarm_of_mma_manufacturing/robots.jpg b/playground/demos/swarm_of_mma_manufacturing/robots.jpg new file mode 100644 index 00000000..bddab6e4 Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/robots.jpg differ diff --git a/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg b/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg new file mode 100644 index 00000000..00456f61 Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg differ diff --git a/pyproject.toml b/pyproject.toml index 0203391c..9854c5c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "2.4.3" +version = "2.4.5" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] diff --git a/swarms/models/base_multimodal_model.py b/swarms/models/base_multimodal_model.py index e5671917..73ec66ff 100644 --- a/swarms/models/base_multimodal_model.py +++ b/swarms/models/base_multimodal_model.py @@ -13,6 +13,49 @@ from termcolor import colored class BaseMultiModalModel: + """ + Base class for multimodal models + + + Args: + model_name (Optional[str], optional): Model name. Defaults to None. + temperature (Optional[int], optional): Temperature. Defaults to 0.5. + max_tokens (Optional[int], optional): Max tokens. Defaults to 500. + max_workers (Optional[int], optional): Max workers. Defaults to 10. + top_p (Optional[int], optional): Top p. Defaults to 1. + top_k (Optional[int], optional): Top k. Defaults to 50. + beautify (Optional[bool], optional): Beautify. Defaults to False. + device (Optional[str], optional): Device. Defaults to "cuda". + max_new_tokens (Optional[int], optional): Max new tokens. Defaults to 500. + retries (Optional[int], optional): Retries. Defaults to 3. + + Examples: + >>> from swarms.models.base_multimodal_model import BaseMultiModalModel + >>> model = BaseMultiModalModel() + >>> model.run("Generate a summary of this text") + >>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png") + >>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"]) + >>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) + >>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"]) + >>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) + >>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"]) + >>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) + >>> model.generate_summary("Generate a summary of this text") + >>> model.set_temperature(0.5) + >>> model.set_max_tokens(500) + >>> model.get_generation_time() + >>> model.get_chat_history() + >>> model.get_unique_chat_history() + >>> model.get_chat_history_length() + >>> model.get_unique_chat_history_length() + >>> model.get_chat_history_tokens() + >>> model.print_beautiful("Print this beautifully") + >>> model.stream("Stream this") + >>> model.unique_chat_history() + >>> model.clear_chat_history() + >>> model.get_img_from_web("https://www.google.com/images/branding/googlelogo/") + + """ def __init__( self, model_name: Optional[str], diff --git a/swarms/models/gpt4_vision_api.py b/swarms/models/gpt4_vision_api.py index 869cde1a..6441140e 100644 --- a/swarms/models/gpt4_vision_api.py +++ b/swarms/models/gpt4_vision_api.py @@ -1,18 +1,16 @@ -import logging import asyncio import base64 -from typing import Optional import concurrent.futures -from termcolor import colored import json +import logging import os from concurrent.futures import ThreadPoolExecutor -from typing import List, Tuple +from typing import List, Optional, Tuple import aiohttp import requests from dotenv import load_dotenv - +from termcolor import colored try: import cv2 @@ -94,9 +92,10 @@ class GPT4VisionAPI: def download_img_then_encode(self, img: str): """Download image from URL then encode image to base64 using requests""" + pass # Function to handle vision tasks - def run(self, task: str, img: str): + def run(self, task: Optional[str] = None, img: Optional[str] = None, *args, **kwargs): """Run the model.""" try: base64_image = self.encode_image(img) @@ -131,6 +130,7 @@ class GPT4VisionAPI: ) out = response.json() + content = print(out) content = out["choices"][0]["message"]["content"] if self.streaming_enabled: @@ -263,6 +263,7 @@ class GPT4VisionAPI: ) out = response.json() + content = print(out) content = out["choices"][0]["message"]["content"] if self.streaming_enabled: @@ -287,6 +288,14 @@ class GPT4VisionAPI: ): """ Run the model on multiple tasks and images all at once using concurrent + + Args: + tasks (List[str]): List of tasks + imgs (List[str]): List of image paths + + Returns: + List[str]: List of responses + """ # Instantiate the thread pool executor @@ -301,8 +310,8 @@ class GPT4VisionAPI: async def arun( self, - task: str, - img: str, + task: Optional[str] = None, + img: Optional[str] = None, ): """ Asynchronously run the model diff --git a/swarms/structs/flow.py b/swarms/structs/flow.py index e0079199..5084c82e 100644 --- a/swarms/structs/flow.py +++ b/swarms/structs/flow.py @@ -9,9 +9,12 @@ from typing import Any, Callable, Dict, List, Optional, Tuple from termcolor import colored +from swarms.tools.tool import BaseTool from swarms.utils.code_interpreter import SubprocessCodeInterpreter from swarms.utils.parse_code import extract_code_in_backticks_in_string -from swarms.tools.tool import BaseTool +from swarms.prompts.multi_modal_autonomous_instruction_prompt import ( + MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, +) # System prompt FLOW_SYSTEM_PROMPT = f""" @@ -154,7 +157,7 @@ class Flow: retry_interval (int): The interval between retry attempts interactive (bool): Whether or not to run in interactive mode dashboard (bool): Whether or not to print the dashboard - dynamic_temperature(bool): Dynamical temperature handling + dynamic_temperature_enabled(bool): Dynamical temperature handling **kwargs (Any): Any additional keyword arguments Methods: @@ -182,7 +185,6 @@ class Flow: add_message_to_memory_and_truncate: Add the message to the memory and truncate print_dashboard: Print dashboard activate_autonomous_agent: Print the autonomous agent activation message - dynamic_temperature: Dynamically change the temperature _check_stopping_condition: Check if the stopping condition is met format_prompt: Format the prompt get_llm_init_params: Get the llm init params @@ -236,18 +238,20 @@ class Flow: dynamic_loops: Optional[bool] = False, interactive: bool = False, dashboard: bool = False, - agent_name: str = " Autonomous Agent XYZ1B", + agent_name: str = "Autonomous Agent XYZ1B", agent_description: str = None, system_prompt: str = FLOW_SYSTEM_PROMPT, tools: List[BaseTool] = None, - dynamic_temperature: bool = False, - sop: str = None, + dynamic_temperature_enabled: Optional[bool] = False, + sop: Optional[str] = None, + sop_list: Optional[List[str]] = None, saved_state_path: Optional[str] = "flow_state.json", - autosave: bool = False, - context_length: int = 8192, + autosave: Optional[bool] = False, + context_length: Optional[int] = 8192, user_name: str = "Human:", - self_healing: bool = False, + self_healing_enabled: bool = False, code_interpreter: bool = False, + multi_modal: Optional[bool] = None, **kwargs: Any, ): self.llm = llm @@ -257,22 +261,17 @@ class Flow: self.loop_interval = loop_interval self.retry_attempts = retry_attempts self.retry_interval = retry_interval - self.feedback = [] - self.memory = [] self.task = None self.stopping_token = stopping_token # or "" self.interactive = interactive self.dashboard = dashboard self.return_history = return_history - self.dynamic_temperature = dynamic_temperature + self.dynamic_temperature_enabled = dynamic_temperature_enabled self.dynamic_loops = dynamic_loops self.user_name = user_name self.context_length = context_length - # SOPS to inject into the system prompt self.sop = sop - # The max_loops will be set dynamically if the dynamic_loop - if self.dynamic_loops: - self.max_loops = "auto" + self.sop_list = sop_list self.tools = tools or [] self.system_prompt = system_prompt self.agent_name = agent_name @@ -280,8 +279,27 @@ class Flow: self.saved_state_path = saved_state_path self.autosave = autosave self.response_filters = [] - self.self_healing = self_healing + self.self_healing_enabled = self_healing_enabled self.code_interpreter = code_interpreter + self.multi_modal = multi_modal + + # The max_loops will be set dynamically if the dynamic_loop + if self.dynamic_loops: + self.max_loops = "auto" + + # If multimodal = yes then set the sop to the multimodal sop + if self.multi_modal: + self.sop = MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + + # If the user inputs a list of strings for the sop then join them and set the sop + if self.sop_list: + self.sop = "\n".join(self.sop_list) + + # Memory + self.feedback = [] + self.memory = [] + + # Initialize the code executor self.code_executor = SubprocessCodeInterpreter() def provide_feedback(self, feedback: str) -> None: @@ -461,7 +479,7 @@ class Flow: Retry Interval: {self.retry_interval} Interactive: {self.interactive} Dashboard: {self.dashboard} - Dynamic Temperature: {self.dynamic_temperature} + Dynamic Temperature: {self.dynamic_temperature_enabled} Autosave: {self.autosave} Saved State: {self.saved_state_path} Model Configuration: {model_config} @@ -528,10 +546,9 @@ class Flow: self.print_dashboard(task) loop_count = 0 - + # While the max_loops is auto or the loop count is less than the max_loops while self.max_loops == "auto" or loop_count < self.max_loops: - # Loop count loop_count += 1 print( @@ -547,7 +564,7 @@ class Flow: break # Adjust temperature, comment if no work - if self.dynamic_temperature: + if self.dynamic_temperature_enabled: self.dynamic_temperature() # Preparing the prompt @@ -653,7 +670,7 @@ class Flow: break # Adjust temperature, comment if no work - if self.dynamic_temperature: + if self.dynamic_temperature_enabled: self.dynamic_temperature() # Preparing the prompt @@ -998,7 +1015,7 @@ class Flow: "retry_interval": self.retry_interval, "interactive": self.interactive, "dashboard": self.dashboard, - "dynamic_temperature": self.dynamic_temperature, + "dynamic_temperature": self.dynamic_temperature_enabled, } with open(file_path, "w") as f: diff --git a/swarms/structs/sequential_workflow.py b/swarms/structs/sequential_workflow.py index 753ada15..0f99a247 100644 --- a/swarms/structs/sequential_workflow.py +++ b/swarms/structs/sequential_workflow.py @@ -29,6 +29,18 @@ class Task: Task class for running a task in a sequential workflow. + Args: + description (str): The description of the task. + flow (Union[Callable, Flow]): The model or flow to execute the task. + args (List[Any]): Additional arguments to pass to the task execution. + kwargs (Dict[str, Any]): Additional keyword arguments to pass to the task execution. + result (Any): The result of the task execution. + history (List[Any]): The history of the task execution. + + Methods: + execute: Execute the task. + + Examples: >>> from swarms.structs import Task, Flow >>> from swarms.models import OpenAIChat @@ -37,8 +49,6 @@ class Task: >>> task.execute() >>> task.result - - """ description: str @@ -54,9 +64,6 @@ class Task: Raises: ValueError: If a Flow instance is used as a task and the 'task' argument is not provided. - - - """ if isinstance(self.flow, Flow): # Add a prompt to notify the Flow of the sequential workflow @@ -114,14 +121,20 @@ class SequentialWorkflow: dashboard: bool = False def add( - self, task: str, flow: Union[Callable, Flow], *args, **kwargs + self, + flow: Union[Callable, Flow], + task: Optional[str] = None, + img: Optional[str] = None, + *args, + **kwargs, ) -> None: """ Add a task to the workflow. Args: - task (str): The task description or the initial input for the Flow. flow (Union[Callable, Flow]): The model or flow to execute the task. + task (str): The task description or the initial input for the Flow. + img (str): The image to understand for the task. *args: Additional arguments to pass to the task execution. **kwargs: Additional keyword arguments to pass to the task execution. """ @@ -130,9 +143,22 @@ class SequentialWorkflow: kwargs["task"] = task # Set the task as a keyword argument for Flow # Append the task to the tasks list - self.tasks.append( - Task(description=task, flow=flow, args=list(args), kwargs=kwargs) - ) + if self.img: + self.tasks.append( + Task( + description=task, + flow=flow, + args=list(args), + kwargs=kwargs, + img=img, + ) + ) + else: + self.tasks.append( + Task( + description=task, flow=flow, args=list(args), kwargs=kwargs + ) + ) def reset_workflow(self) -> None: """Resets the workflow by clearing the results of each task.""" @@ -148,18 +174,16 @@ class SequentialWorkflow: """ return {task.description: task.result for task in self.tasks} - def remove_task(self, task_description: str) -> None: + def remove_task(self, task: str) -> None: """Remove tasks from sequential workflow""" - self.tasks = [ - task for task in self.tasks if task.description != task_description - ] + self.tasks = [task for task in self.tasks if task.description != task] - def update_task(self, task_description: str, **updates) -> None: + def update_task(self, task: str, **updates) -> None: """ Updates the arguments of a task in the workflow. Args: - task_description (str): The description of the task to update. + task (str): The description of the task to update. **updates: The updates to apply to the task. Raises: @@ -178,11 +202,11 @@ class SequentialWorkflow: """ for task in self.tasks: - if task.description == task_description: + if task.description == task: task.kwargs.update(updates) break else: - raise ValueError(f"Task {task_description} not found in workflow.") + raise ValueError(f"Task {task} not found in workflow.") def save_workflow_state( self, @@ -272,6 +296,7 @@ class SequentialWorkflow: ) def workflow_shutdown(self, **kwargs) -> None: + """Shuts down the workflow.""" print( colored( """ @@ -282,6 +307,7 @@ class SequentialWorkflow: ) def add_objective_to_workflow(self, task: str, **kwargs) -> None: + """Adds an objective to the workflow.""" print( colored( """