diff --git a/.env.example b/.env.example index c6c3cade..6197a6d4 100644 --- a/.env.example +++ b/.env.example @@ -5,7 +5,7 @@ AI21_API_KEY="your_api_key_here" COHERE_API_KEY="your_api_key_here" ALEPHALPHA_API_KEY="your_api_key_here" HUGGINFACEHUB_API_KEY="your_api_key_here" - +STABILITY_API_KEY="your_api_key_here" WOLFRAM_ALPHA_APPID="your_wolfram_alpha_appid_here" ZAPIER_NLA_API_KEY="your_zapier_nla_api_key_here" @@ -41,4 +41,4 @@ REDIS_PORT= PINECONE_API_KEY="" BING_COOKIE="" -PSG_CONNECTION_STRING="" \ No newline at end of file +PSG_CONNECTION_STRING="" diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index de09fd0b..fab4f817 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -19,9 +19,9 @@ jobs: python-version: ["3.7", "3.9", "3.10", "3.11"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.readthedocs.yml b/.readthedocs.yml index 1c969083..e3e74fad 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,4 +1,3 @@ ---- version: 2 build: diff --git a/README.md b/README.md index 2e3adbd4..d60a110a 100644 --- a/README.md +++ b/README.md @@ -34,28 +34,35 @@ Run example in Collab: gpt4 text -> dalle3 img -> gpt4vision img + text analyze img -> dalle3 img -> loop + +""" +from swarms.models.gpt4_vision_api import GPT4VisionAPI diff --git a/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg b/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg new file mode 100644 index 00000000..5e9a0fff Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg differ diff --git a/playground/demos/swarm_of_mma_manufacturing/main.py b/playground/demos/swarm_of_mma_manufacturing/main.py new file mode 100644 index 00000000..d3705418 --- /dev/null +++ b/playground/demos/swarm_of_mma_manufacturing/main.py @@ -0,0 +1,127 @@ +""" +Swarm of multi modal autonomous agents for manufacturing! +--------------------------------------------------------- +Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest +Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest +Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest +Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest +Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest +Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest +Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest + + +Flow: +health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent +""" +from swarms.structs import Flow +import os +from dotenv import load_dotenv +from swarms.models import GPT4VisionAPI + +load_dotenv() +api_key = os.getenv("OPENAI_API_KEY") + + +llm = GPT4VisionAPI( + openai_api_key=api_key +) + +assembly_line = "playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg" +red_robots = "playground/demos/swarm_of_mma_manufacturing/red_robots.jpg" +robots = "playground/demos/swarm_of_mma_manufacturing/robots.jpg" +tesla_assembly_line = "playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg" + + +# Define detailed prompts for each agent +tasks = { + "health_safety": ( + "Analyze the factory's working environment for health safety. Focus on" + " cleanliness, ventilation, spacing between workstations, and personal" + " protective equipment availability." + ), + "productivity": ( + "Review the factory's workflow efficiency, machine utilization, and" + " employee engagement. Identify operational delays or bottlenecks." + ), + "safety": ( + "Analyze the factory's safety measures, including fire exits, safety" + " signage, and emergency response equipment." + ), + "security": ( + "Evaluate the factory's security systems, entry/exit controls, and" + " potential vulnerabilities." + ), + "sustainability": ( + "Inspect the factory's sustainability practices, including waste" + " management, energy usage, and eco-friendly processes." + ), + "efficiency": ( + "Assess the manufacturing process's efficiency, considering the layout," + " logistics, and automation level." + ), +} + + +# Define prompts for each agent +health_safety_prompt = tasks["health_safety"] +productivity_prompt = tasks["productivity"] +safety_prompt = tasks["safety"] +security_prompt = tasks["security"] +sustainability_prompt = tasks["sustainability"] +efficiency_prompt = tasks["efficiency"] + + +# Health security agent +health_security_agent = Flow( + llm=llm, + sop_list=health_safety_prompt, + max_loops=2, + multi_modal=True +) + +# Quality control agent +productivity_check_agent = Flow( + llm=llm, + sop=productivity_prompt, + max_loops=2, + multi_modal=True +) + +# Security agent +security_check_agent = Flow( + llm=llm, + sop=security_prompt, + max_loops=2, + multi_modal=True +) + +# Efficiency agent +efficiency_check_agent = Flow( + llm=llm, + sop=efficiency_prompt, + max_loops=2, + multi_modal=True +) + + +# Add the first task to the health_security_agent +health_check = health_security_agent.run( + "Analyze the safety of this factory", + robots +) + +# Add the third task to the productivity_check_agent +productivity_check = productivity_check_agent.run( + health_check, assembly_line +) + +# Add the fourth task to the security_check_agent +security_check = security_check_agent.add( + productivity_check, red_robots +) + +# Add the fifth task to the efficiency_check_agent +efficiency_check = efficiency_check_agent.run( + security_check, tesla_assembly_line +) + diff --git a/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg b/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg new file mode 100644 index 00000000..f086fa67 Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg differ diff --git a/playground/demos/swarm_of_mma_manufacturing/robots.jpg b/playground/demos/swarm_of_mma_manufacturing/robots.jpg new file mode 100644 index 00000000..bddab6e4 Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/robots.jpg differ diff --git a/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg b/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg new file mode 100644 index 00000000..00456f61 Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg differ diff --git a/playground/models/stable_diffusion.py b/playground/models/stable_diffusion.py new file mode 100644 index 00000000..3bb77c39 --- /dev/null +++ b/playground/models/stable_diffusion.py @@ -0,0 +1,112 @@ +import os +import base64 +import requests +from dotenv import load_dotenv +from typing import List + +load_dotenv() + +class StableDiffusion: + """ + A class to interact with the Stable Diffusion API for image generation. + + Attributes: + ----------- + api_key : str + The API key for accessing the Stable Diffusion API. + api_host : str + The host URL of the Stable Diffusion API. + engine_id : str + The ID of the Stable Diffusion engine. + headers : dict + The headers for the API request. + output_dir : str + Directory where generated images will be saved. + + Methods: + -------- + generate_image(prompt: str, cfg_scale: int, height: int, width: int, samples: int, steps: int) -> List[str]: + Generates images based on a text prompt and returns a list of file paths to the generated images. + """ + + def __init__(self, api_key: str, api_host: str = "https://api.stability.ai"): + """ + Initializes the StableDiffusion class with the provided API key and host. + + Parameters: + ----------- + api_key : str + The API key for accessing the Stable Diffusion API. + api_host : str + The host URL of the Stable Diffusion API. Default is "https://api.stability.ai". + """ + self.api_key = api_key + self.api_host = api_host + self.engine_id = "stable-diffusion-v1-6" + self.headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + "Accept": "application/json" + } + self.output_dir = "images" + os.makedirs(self.output_dir, exist_ok=True) + + def generate_image(self, prompt: str, cfg_scale: int = 7, height: int = 1024, width: int = 1024, samples: int = 1, steps: int = 30) -> List[str]: + """ + Generates images based on a text prompt. + + Parameters: + ----------- + prompt : str + The text prompt based on which the image will be generated. + cfg_scale : int + CFG scale parameter for image generation. Default is 7. + height : int + Height of the generated image. Default is 1024. + width : int + Width of the generated image. Default is 1024. + samples : int + Number of images to generate. Default is 1. + steps : int + Number of steps for the generation process. Default is 30. + + Returns: + -------- + List[str]: + A list of paths to the generated images. + + Raises: + ------- + Exception: + If the API response is not 200 (OK). + """ + response = requests.post( + f"{self.api_host}/v1/generation/{self.engine_id}/text-to-image", + headers=self.headers, + json={ + "text_prompts": [{"text": prompt}], + "cfg_scale": cfg_scale, + "height": height, + "width": width, + "samples": samples, + "steps": steps, + }, + ) + + if response.status_code != 200: + raise Exception(f"Non-200 response: {response.text}") + + data = response.json() + image_paths = [] + for i, image in enumerate(data["artifacts"]): + image_path = os.path.join(self.output_dir, f"v1_txt2img_{i}.png") + with open(image_path, "wb") as f: + f.write(base64.b64decode(image["base64"])) + image_paths.append(image_path) + + return image_paths + +# Usage example: +# sd = StableDiffusion("your-api-key") +# images = sd.generate_image("A scenic landscape with mountains") +# print(images) diff --git a/pyproject.toml b/pyproject.toml index a15d5d83..351442f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "2.4.1" +version = "2.4.5" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] diff --git a/swarms/__init__.py b/swarms/__init__.py index d876c04e..9ceb78f2 100644 --- a/swarms/__init__.py +++ b/swarms/__init__.py @@ -1,18 +1,6 @@ -import logging -import os -import warnings +from swarms.utils.disable_logging import disable_logging -warnings.filterwarnings("ignore", category=UserWarning) - -# disable tensorflow warnings -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" - -try: - log = logging.getLogger("pytorch") - log.propagate = False - log.setLevel(logging.ERROR) -except Exception as error: - print(f"Pytorch logging not disabled: {error}") +disable_logging() from swarms.agents import * # noqa: E402, F403 from swarms.swarms import * # noqa: E402, F403 diff --git a/swarms/models/base_multimodal_model.py b/swarms/models/base_multimodal_model.py index 54eed0ed..73ec66ff 100644 --- a/swarms/models/base_multimodal_model.py +++ b/swarms/models/base_multimodal_model.py @@ -1,3 +1,4 @@ +from abc import abstractmethod import asyncio import base64 import concurrent.futures @@ -7,11 +8,54 @@ from io import BytesIO from typing import List, Optional, Tuple import requests -from ABC import abstractmethod from PIL import Image +from termcolor import colored class BaseMultiModalModel: + """ + Base class for multimodal models + + + Args: + model_name (Optional[str], optional): Model name. Defaults to None. + temperature (Optional[int], optional): Temperature. Defaults to 0.5. + max_tokens (Optional[int], optional): Max tokens. Defaults to 500. + max_workers (Optional[int], optional): Max workers. Defaults to 10. + top_p (Optional[int], optional): Top p. Defaults to 1. + top_k (Optional[int], optional): Top k. Defaults to 50. + beautify (Optional[bool], optional): Beautify. Defaults to False. + device (Optional[str], optional): Device. Defaults to "cuda". + max_new_tokens (Optional[int], optional): Max new tokens. Defaults to 500. + retries (Optional[int], optional): Retries. Defaults to 3. + + Examples: + >>> from swarms.models.base_multimodal_model import BaseMultiModalModel + >>> model = BaseMultiModalModel() + >>> model.run("Generate a summary of this text") + >>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png") + >>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"]) + >>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) + >>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"]) + >>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) + >>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"]) + >>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) + >>> model.generate_summary("Generate a summary of this text") + >>> model.set_temperature(0.5) + >>> model.set_max_tokens(500) + >>> model.get_generation_time() + >>> model.get_chat_history() + >>> model.get_unique_chat_history() + >>> model.get_chat_history_length() + >>> model.get_unique_chat_history_length() + >>> model.get_chat_history_tokens() + >>> model.print_beautiful("Print this beautifully") + >>> model.stream("Stream this") + >>> model.unique_chat_history() + >>> model.clear_chat_history() + >>> model.get_img_from_web("https://www.google.com/images/branding/googlelogo/") + + """ def __init__( self, model_name: Optional[str], @@ -20,6 +64,7 @@ class BaseMultiModalModel: max_workers: Optional[int] = 10, top_p: Optional[int] = 1, top_k: Optional[int] = 50, + beautify: Optional[bool] = False, device: Optional[str] = "cuda", max_new_tokens: Optional[int] = 500, retries: Optional[int] = 3, @@ -30,12 +75,12 @@ class BaseMultiModalModel: self.max_workers = max_workers self.top_p = top_p self.top_k = top_k + self.beautify = beautify self.device = device self.max_new_tokens = max_new_tokens self.retries = retries self.chat_history = [] - @abstractmethod def __call__(self, text: str, img: str): """Run the model""" @@ -59,17 +104,17 @@ class BaseMultiModalModel: except requests.RequestException as error: print(f"Error fetching image from {img} and error: {error}") return None - + def encode_img(self, img: str): """Encode the image to base64""" with open(img, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8") - + def get_img(self, img: str): """Get the image from the path""" image_pil = Image.open(img) return image_pil - + def clear_chat_history(self): """Clear the chat history""" self.chat_history = [] @@ -85,11 +130,11 @@ class BaseMultiModalModel: Args: tasks (List[str]): List of tasks imgs (List[str]): List of image paths - + Returns: List[str]: List of responses - - + + """ # Instantiate the thread pool executor with ThreadPoolExecutor(max_workers=self.max_workers) as executor: @@ -99,7 +144,6 @@ class BaseMultiModalModel: for result in results: print(result) - def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]: """Process a batch of tasks and images""" with concurrent.futures.ThreadPoolExecutor() as executor: @@ -131,11 +175,11 @@ class BaseMultiModalModel: for task, img in tasks_images ] return await asyncio.gather(*futures) - + def unique_chat_history(self): """Get the unique chat history""" return list(set(self.chat_history)) - + def run_with_retries(self, task: str, img: str): """Run the model with retries""" for i in range(self.retries): @@ -144,7 +188,7 @@ class BaseMultiModalModel: except Exception as error: print(f"Error with the request {error}") continue - + def run_batch_with_retries(self, tasks_images: List[Tuple[str, str]]): """Run the model with retries""" for i in range(self.retries): @@ -186,24 +230,37 @@ class BaseMultiModalModel: if self.start_time and self.end_time: return self.end_time - self.start_time return 0 - + def get_chat_history(self): """Get the chat history""" return self.chat_history - + def get_unique_chat_history(self): """Get the unique chat history""" return list(set(self.chat_history)) - + def get_chat_history_length(self): """Get the chat history length""" return len(self.chat_history) - + def get_unique_chat_history_length(self): """Get the unique chat history length""" return len(list(set(self.chat_history))) - + def get_chat_history_tokens(self): """Get the chat history tokens""" return self._num_tokens() - \ No newline at end of file + + def print_beautiful(self, content: str, color: str = "cyan"): + """Print Beautifully with termcolor""" + content = colored(content, color) + print(content) + + def stream(self, content: str): + """Stream the output + + Args: + content (str): _description_ + """ + for chunk in content: + print(chunk) diff --git a/swarms/models/gpt4_vision_api.py b/swarms/models/gpt4_vision_api.py index 8cf9371d..0370b2c2 100644 --- a/swarms/models/gpt4_vision_api.py +++ b/swarms/models/gpt4_vision_api.py @@ -1,15 +1,22 @@ import asyncio import base64 import concurrent.futures -from termcolor import colored import json +import logging import os from concurrent.futures import ThreadPoolExecutor -from typing import List, Tuple +from typing import List, Optional, Tuple import aiohttp import requests from dotenv import load_dotenv +from termcolor import colored + +try: + import cv2 +except ImportError: + print("OpenCV not installed. Please install OpenCV to use this model.") + raise ImportError # Load environment variables load_dotenv() @@ -54,16 +61,29 @@ class GPT4VisionAPI: self, openai_api_key: str = openai_api_key, model_name: str = "gpt-4-vision-preview", + logging_enabled: bool = False, max_workers: int = 10, max_tokens: str = 300, openai_proxy: str = "https://api.openai.com/v1/chat/completions", + beautify: bool = False, + streaming_enabled: Optional[bool] = False, ): super().__init__() self.openai_api_key = openai_api_key + self.logging_enabled = logging_enabled self.model_name = model_name self.max_workers = max_workers self.max_tokens = max_tokens self.openai_proxy = openai_proxy + self.beautify = beautify + self.streaming_enabled = streaming_enabled + + if self.logging_enabled: + logging.basicConfig(level=logging.DEBUG) + else: + # Disable debug logs for requests and urllib3 + logging.getLogger("requests").setLevel(logging.WARNING) + logging.getLogger("urllib3").setLevel(logging.WARNING) def encode_image(self, img: str): """Encode image to base64.""" @@ -72,9 +92,10 @@ class GPT4VisionAPI: def download_img_then_encode(self, img: str): """Download image from URL then encode image to base64 using requests""" + pass # Function to handle vision tasks - def run(self, task: str, img: str): + def run(self, task: Optional[str] = None, img: Optional[str] = None, *args, **kwargs): """Run the model.""" try: base64_image = self.encode_image(img) @@ -83,7 +104,7 @@ class GPT4VisionAPI: "Authorization": f"Bearer {openai_api_key}", } payload = { - "model": self.model_name, + "model": "gpt-4-vision-preview", "messages": [ { "role": "user", @@ -103,18 +124,109 @@ class GPT4VisionAPI: "max_tokens": self.max_tokens, } response = requests.post( - "https://api.openai.com/v1/chat/completions", + self.openai_proxy, headers=headers, json=payload, ) out = response.json() content = out["choices"][0]["message"]["content"] - print(content) + + if self.streaming_enabled: + content = self.stream_response(content) + else: + pass + + if self.beautify: + content = colored(content, "cyan") + print(content) + else: + print(content) + except Exception as error: print(f"Error with the request: {error}") raise error + def video_prompt(self, frames): + """ + SystemPrompt is a class that generates a prompt for the user to respond to. + The prompt is generated based on the current state of the system. + + Parameters + ---------- + frames : list + A list of base64 frames + + Returns + ------- + PROMPT : str + The system prompt + + Examples + -------- + + >>> from swarms.models import GPT4VisionAPI + >>> llm = GPT4VisionAPI() + >>> video = "video.mp4" + >>> base64_frames = llm.process_video(video) + >>> prompt = llm.video_prompt(base64_frames) + >>> print(prompt) + + """ + PROMPT = f""" + These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video: + + {frames} + """ + return PROMPT + + def stream_response(self, content: str): + """Stream the response of the output + + Args: + content (str): _description_ + """ + for chunk in content: + print(chunk) + + def process_video(self, video: str): + """ + Process a video into a list of base64 frames + + Parameters + ---------- + video : str + The path to the video file + + Returns + ------- + base64_frames : list + A list of base64 frames + + Examples + -------- + >>> from swarms.models import GPT4VisionAPI + >>> llm = GPT4VisionAPI() + >>> video = "video.mp4" + >>> base64_frames = llm.process_video(video) + + """ + video = cv2.VideoCapture(video) + + base64_frames = [] + while video.isOpened(): + success, frame = video.read() + if not success: + break + _, buffer = cv2.imencode(".jpg", frame) + base64_frames.append(base64.b64encode(buffer).decode("utf-8")) + + video.release() + print(len(base64_frames), "frames read.") + + for img in base64_frames: + base64.b64decode(img.encode("utf-8")) + def __call__(self, task: str, img: str): """Run the model.""" try: @@ -151,11 +263,21 @@ class GPT4VisionAPI: out = response.json() content = out["choices"][0]["message"]["content"] - print(content) + + if self.streaming_enabled: + content = self.stream_response(content) + else: + pass + + if self.beautify: + content = colored(content, "cyan") + print(content) + else: + print(content) + except Exception as error: print(f"Error with the request: {error}") raise error - # Function to handle vision tasks def run_many( self, @@ -164,6 +286,14 @@ class GPT4VisionAPI: ): """ Run the model on multiple tasks and images all at once using concurrent + + Args: + tasks (List[str]): List of tasks + imgs (List[str]): List of image paths + + Returns: + List[str]: List of responses + """ # Instantiate the thread pool executor @@ -178,8 +308,8 @@ class GPT4VisionAPI: async def arun( self, - task: str, - img: str, + task: Optional[str] = None, + img: Optional[str] = None, ): """ Asynchronously run the model diff --git a/swarms/models/kosmos_two.py b/swarms/models/kosmos_two.py index 7e9da590..99998287 100644 --- a/swarms/models/kosmos_two.py +++ b/swarms/models/kosmos_two.py @@ -24,7 +24,7 @@ class Kosmos: ---------- model_name : str Path to the pretrained model - + Examples -------- >>> kosmos = Kosmos() diff --git a/swarms/models/whisperx_model.py b/swarms/models/whisperx_model.py index 883c3edb..338db6e3 100644 --- a/swarms/models/whisperx_model.py +++ b/swarms/models/whisperx_model.py @@ -99,7 +99,9 @@ class WhisperX: print("The key 'segments' is not found in the result.") def transcribe(self, audio_file): - model = whisperx_model.load_model("large-v2", self.device, self.compute_type) + model = whisperx_model.load_model( + "large-v2", self.device, self.compute_type + ) audio = whisperx_model.load_audio(audio_file) result = model.transcribe(audio, batch_size=self.batch_size) diff --git a/swarms/structs/flow.py b/swarms/structs/flow.py index 47740f73..19d9a90e 100644 --- a/swarms/structs/flow.py +++ b/swarms/structs/flow.py @@ -9,9 +9,12 @@ from typing import Any, Callable, Dict, List, Optional, Tuple from termcolor import colored +from swarms.tools.tool import BaseTool from swarms.utils.code_interpreter import SubprocessCodeInterpreter from swarms.utils.parse_code import extract_code_in_backticks_in_string -from swarms.tools.tool import BaseTool +from swarms.prompts.multi_modal_autonomous_instruction_prompt import ( + MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, +) # System prompt FLOW_SYSTEM_PROMPT = f""" @@ -154,7 +157,7 @@ class Flow: retry_interval (int): The interval between retry attempts interactive (bool): Whether or not to run in interactive mode dashboard (bool): Whether or not to print the dashboard - dynamic_temperature(bool): Dynamical temperature handling + dynamic_temperature_enabled(bool): Dynamical temperature handling **kwargs (Any): Any additional keyword arguments Methods: @@ -182,7 +185,6 @@ class Flow: add_message_to_memory_and_truncate: Add the message to the memory and truncate print_dashboard: Print dashboard activate_autonomous_agent: Print the autonomous agent activation message - dynamic_temperature: Dynamically change the temperature _check_stopping_condition: Check if the stopping condition is met format_prompt: Format the prompt get_llm_init_params: Get the llm init params @@ -236,18 +238,20 @@ class Flow: dynamic_loops: Optional[bool] = False, interactive: bool = False, dashboard: bool = False, - agent_name: str = " Autonomous Agent XYZ1B", + agent_name: str = "Autonomous Agent XYZ1B", agent_description: str = None, system_prompt: str = FLOW_SYSTEM_PROMPT, tools: List[BaseTool] = None, - dynamic_temperature: bool = False, - sop: str = None, + dynamic_temperature_enabled: Optional[bool] = False, + sop: Optional[str] = None, + sop_list: Optional[List[str]] = None, saved_state_path: Optional[str] = "flow_state.json", - autosave: bool = False, - context_length: int = 8192, + autosave: Optional[bool] = False, + context_length: Optional[int] = 8192, user_name: str = "Human:", - self_healing: bool = False, - code_interpreter: bool = False, + self_healing_enabled: Optional[bool] = False, + code_interpreter: Optional[bool] = False, + multi_modal: Optional[bool] = None, **kwargs: Any, ): self.llm = llm @@ -257,22 +261,17 @@ class Flow: self.loop_interval = loop_interval self.retry_attempts = retry_attempts self.retry_interval = retry_interval - self.feedback = [] - self.memory = [] self.task = None self.stopping_token = stopping_token # or "" self.interactive = interactive self.dashboard = dashboard self.return_history = return_history - self.dynamic_temperature = dynamic_temperature + self.dynamic_temperature_enabled = dynamic_temperature_enabled self.dynamic_loops = dynamic_loops self.user_name = user_name self.context_length = context_length - # SOPS to inject into the system prompt self.sop = sop - # The max_loops will be set dynamically if the dynamic_loop - if self.dynamic_loops: - self.max_loops = "auto" + self.sop_list = sop_list self.tools = tools or [] self.system_prompt = system_prompt self.agent_name = agent_name @@ -280,8 +279,27 @@ class Flow: self.saved_state_path = saved_state_path self.autosave = autosave self.response_filters = [] - self.self_healing = self_healing + self.self_healing_enabled = self_healing_enabled self.code_interpreter = code_interpreter + self.multi_modal = multi_modal + + # The max_loops will be set dynamically if the dynamic_loop + if self.dynamic_loops: + self.max_loops = "auto" + + # If multimodal = yes then set the sop to the multimodal sop + if self.multi_modal: + self.sop = MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + + # If the user inputs a list of strings for the sop then join them and set the sop + if self.sop_list: + self.sop = "\n".join(self.sop_list) + + # Memory + self.feedback = [] + self.memory = [] + + # Initialize the code executor self.code_executor = SubprocessCodeInterpreter() def provide_feedback(self, feedback: str) -> None: @@ -461,7 +479,7 @@ class Flow: Retry Interval: {self.retry_interval} Interactive: {self.interactive} Dashboard: {self.dashboard} - Dynamic Temperature: {self.dynamic_temperature} + Dynamic Temperature: {self.dynamic_temperature_enabled} Autosave: {self.autosave} Saved State: {self.saved_state_path} Model Configuration: {model_config} @@ -498,7 +516,7 @@ class Flow: ) print(error) - def run(self, task: str, img: Optional[str], **kwargs): + def run(self, task: Optional[str], img: Optional[str] = None, **kwargs): """ Run the autonomous agent loop @@ -528,7 +546,10 @@ class Flow: self.print_dashboard(task) loop_count = 0 + + # While the max_loops is auto or the loop count is less than the max_loops while self.max_loops == "auto" or loop_count < self.max_loops: + # Loop count loop_count += 1 print( colored(f"\nLoop {loop_count} of {self.max_loops}", "blue") @@ -543,7 +564,7 @@ class Flow: break # Adjust temperature, comment if no work - if self.dynamic_temperature: + if self.dynamic_temperature_enabled: self.dynamic_temperature() # Preparing the prompt @@ -649,7 +670,7 @@ class Flow: break # Adjust temperature, comment if no work - if self.dynamic_temperature: + if self.dynamic_temperature_enabled: self.dynamic_temperature() # Preparing the prompt @@ -994,7 +1015,7 @@ class Flow: "retry_interval": self.retry_interval, "interactive": self.interactive, "dashboard": self.dashboard, - "dynamic_temperature": self.dynamic_temperature, + "dynamic_temperature": self.dynamic_temperature_enabled, } with open(file_path, "w") as f: diff --git a/swarms/structs/sequential_workflow.py b/swarms/structs/sequential_workflow.py index 753ada15..0f99a247 100644 --- a/swarms/structs/sequential_workflow.py +++ b/swarms/structs/sequential_workflow.py @@ -29,6 +29,18 @@ class Task: Task class for running a task in a sequential workflow. + Args: + description (str): The description of the task. + flow (Union[Callable, Flow]): The model or flow to execute the task. + args (List[Any]): Additional arguments to pass to the task execution. + kwargs (Dict[str, Any]): Additional keyword arguments to pass to the task execution. + result (Any): The result of the task execution. + history (List[Any]): The history of the task execution. + + Methods: + execute: Execute the task. + + Examples: >>> from swarms.structs import Task, Flow >>> from swarms.models import OpenAIChat @@ -37,8 +49,6 @@ class Task: >>> task.execute() >>> task.result - - """ description: str @@ -54,9 +64,6 @@ class Task: Raises: ValueError: If a Flow instance is used as a task and the 'task' argument is not provided. - - - """ if isinstance(self.flow, Flow): # Add a prompt to notify the Flow of the sequential workflow @@ -114,14 +121,20 @@ class SequentialWorkflow: dashboard: bool = False def add( - self, task: str, flow: Union[Callable, Flow], *args, **kwargs + self, + flow: Union[Callable, Flow], + task: Optional[str] = None, + img: Optional[str] = None, + *args, + **kwargs, ) -> None: """ Add a task to the workflow. Args: - task (str): The task description or the initial input for the Flow. flow (Union[Callable, Flow]): The model or flow to execute the task. + task (str): The task description or the initial input for the Flow. + img (str): The image to understand for the task. *args: Additional arguments to pass to the task execution. **kwargs: Additional keyword arguments to pass to the task execution. """ @@ -130,9 +143,22 @@ class SequentialWorkflow: kwargs["task"] = task # Set the task as a keyword argument for Flow # Append the task to the tasks list - self.tasks.append( - Task(description=task, flow=flow, args=list(args), kwargs=kwargs) - ) + if self.img: + self.tasks.append( + Task( + description=task, + flow=flow, + args=list(args), + kwargs=kwargs, + img=img, + ) + ) + else: + self.tasks.append( + Task( + description=task, flow=flow, args=list(args), kwargs=kwargs + ) + ) def reset_workflow(self) -> None: """Resets the workflow by clearing the results of each task.""" @@ -148,18 +174,16 @@ class SequentialWorkflow: """ return {task.description: task.result for task in self.tasks} - def remove_task(self, task_description: str) -> None: + def remove_task(self, task: str) -> None: """Remove tasks from sequential workflow""" - self.tasks = [ - task for task in self.tasks if task.description != task_description - ] + self.tasks = [task for task in self.tasks if task.description != task] - def update_task(self, task_description: str, **updates) -> None: + def update_task(self, task: str, **updates) -> None: """ Updates the arguments of a task in the workflow. Args: - task_description (str): The description of the task to update. + task (str): The description of the task to update. **updates: The updates to apply to the task. Raises: @@ -178,11 +202,11 @@ class SequentialWorkflow: """ for task in self.tasks: - if task.description == task_description: + if task.description == task: task.kwargs.update(updates) break else: - raise ValueError(f"Task {task_description} not found in workflow.") + raise ValueError(f"Task {task} not found in workflow.") def save_workflow_state( self, @@ -272,6 +296,7 @@ class SequentialWorkflow: ) def workflow_shutdown(self, **kwargs) -> None: + """Shuts down the workflow.""" print( colored( """ @@ -282,6 +307,7 @@ class SequentialWorkflow: ) def add_objective_to_workflow(self, task: str, **kwargs) -> None: + """Adds an objective to the workflow.""" print( colored( """ diff --git a/swarms/utils/disable_logging.py b/swarms/utils/disable_logging.py new file mode 100644 index 00000000..d1c7df9b --- /dev/null +++ b/swarms/utils/disable_logging.py @@ -0,0 +1,30 @@ +import logging +import os +import warnings + + +def disable_logging(): + warnings.filterwarnings("ignore", category=UserWarning) + + # disable tensorflow warnings + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + + # Set the logging level for the entire module + logging.basicConfig(level=logging.WARNING) + + try: + log = logging.getLogger("pytorch") + log.propagate = False + log.setLevel(logging.ERROR) + except Exception as error: + print(f"Pytorch logging not disabled: {error}") + + for logger_name in [ + "tensorflow", + "h5py", + "numexpr", + "git", + "wandb.docker.auth", + ]: + logger = logging.getLogger(logger_name) + logger.setLevel(logging.WARNING) # Supress DEBUG and info logs