From e8681b223c50ee83a6d141bdd869d1a6457a6493 Mon Sep 17 00:00:00 2001 From: Kye Date: Wed, 31 Jan 2024 17:59:19 -0500 Subject: [PATCH] [CLEANUP] --- example.py | 4 +- playground/demos/visuo/text_to_sql_agent.py | 33 +++ playground/models/roboflow_example.py | 13 + playground/{swarms => structs}/chat.py | 0 playground/{swarms => structs}/debate.py | 0 .../{swarms => structs}/dialogue_simulator.py | 0 .../{swarms => structs}/easy_example.py | 0 playground/{swarms => structs}/godmode.py | 0 playground/{swarms => structs}/groupchat.py | 0 playground/{swarms => structs}/gui_app.py | 0 .../{swarms => structs}/multi_agent_collab.py | 0 .../{swarms => structs}/multi_agent_debate.py | 0 playground/{swarms => structs}/orchestrate.py | 0 .../{swarms => structs}/orchestrator.py | 0 playground/{swarms => structs}/social_app.py | 0 .../{swarms => structs}/swarms_example.py | 0 playground/{swarms => structs}/todo_app.py | 0 playground/swarms/autoscaler.py | 7 - ...orkflow.py => workflow_example_example.py} | 0 pyproject.toml | 3 +- requirements.txt | 5 +- scripts/playground_to_examples.sh | 13 + swarms/models/__init__.py | 4 + swarms/models/gpt4_sam.py | 81 ++++++ swarms/models/odin.py | 2 +- swarms/models/roboflow_model.py | 64 +++++ swarms/models/sam_supervision.py | 116 ++++++++ swarms/models/ultralytics_model.py | 27 +- swarms/structs/agent.py | 49 ++-- swarms/structs/conversation.py | 46 ++++ swarms/utils/__init__.py | 18 ++ swarms/utils/download_img.py | 31 +++ swarms/utils/logger.py | 34 +++ swarms/utils/supervision_masking.py | 259 ++++++++++++++++++ swarms/utils/supervision_visualizer.py | 85 ++++++ 35 files changed, 858 insertions(+), 36 deletions(-) create mode 100644 playground/demos/visuo/text_to_sql_agent.py create mode 100644 playground/models/roboflow_example.py rename playground/{swarms => structs}/chat.py (100%) rename playground/{swarms => structs}/debate.py (100%) rename playground/{swarms => structs}/dialogue_simulator.py (100%) rename playground/{swarms => structs}/easy_example.py (100%) rename playground/{swarms => structs}/godmode.py (100%) rename playground/{swarms => structs}/groupchat.py (100%) rename playground/{swarms => structs}/gui_app.py (100%) rename playground/{swarms => structs}/multi_agent_collab.py (100%) rename playground/{swarms => structs}/multi_agent_debate.py (100%) rename playground/{swarms => structs}/orchestrate.py (100%) rename playground/{swarms => structs}/orchestrator.py (100%) rename playground/{swarms => structs}/social_app.py (100%) rename playground/{swarms => structs}/swarms_example.py (100%) rename playground/{swarms => structs}/todo_app.py (100%) delete mode 100644 playground/swarms/autoscaler.py rename playground/{workflow.py => workflow_example_example.py} (100%) create mode 100755 scripts/playground_to_examples.sh create mode 100644 swarms/models/gpt4_sam.py create mode 100644 swarms/models/roboflow_model.py create mode 100644 swarms/models/sam_supervision.py create mode 100644 swarms/utils/download_img.py create mode 100644 swarms/utils/supervision_masking.py create mode 100644 swarms/utils/supervision_visualizer.py diff --git a/example.py b/example.py index b5aee4a4..33c3bef1 100644 --- a/example.py +++ b/example.py @@ -3,7 +3,7 @@ import os from dotenv import load_dotenv # Import the OpenAIChat model and the Agent struct -from swarms import OpenAIChat, Agent +from swarms import Agent, OpenAIChat # Load the environment variables load_dotenv() @@ -22,7 +22,7 @@ llm = OpenAIChat( ## Initialize the workflow agent = Agent( llm=llm, - max_loops=1, + max_loops=4, autosave=True, dashboard=True, ) diff --git a/playground/demos/visuo/text_to_sql_agent.py b/playground/demos/visuo/text_to_sql_agent.py new file mode 100644 index 00000000..67f53e97 --- /dev/null +++ b/playground/demos/visuo/text_to_sql_agent.py @@ -0,0 +1,33 @@ +import os + +from dotenv import load_dotenv + +# Import the OpenAIChat model and the Agent struct +from swarms import Agent, HuggingfaceLLM + +# Load the environment variables +load_dotenv() + +# Get the API key from the environment +api_key = os.environ.get("OPENAI_API_KEY") + +# Initialize the language model +llm = HuggingfaceLLM( + model_id="codellama/CodeLlama-70b-hf", + max_length=4000, + quantize=True, + temperature=0.5, +) + +## Initialize the workflow +agent = Agent( + llm=llm, + max_loops="auto", + system_prompt=None, + autosave=True, + dashboard=True, + tools=[None], +) + +# Run the workflow on a task +agent.run("Generate a 10,000 word blog on health and wellness.") diff --git a/playground/models/roboflow_example.py b/playground/models/roboflow_example.py new file mode 100644 index 00000000..e7cae29e --- /dev/null +++ b/playground/models/roboflow_example.py @@ -0,0 +1,13 @@ +from swarms import RoboflowMultiModal + + +# Initialize the model +model = RoboflowMultiModal( + api_key="api", + project_id="your project id", + hosted=False, +) + + +# Run the model on an img +out = model("img.png") diff --git a/playground/swarms/chat.py b/playground/structs/chat.py similarity index 100% rename from playground/swarms/chat.py rename to playground/structs/chat.py diff --git a/playground/swarms/debate.py b/playground/structs/debate.py similarity index 100% rename from playground/swarms/debate.py rename to playground/structs/debate.py diff --git a/playground/swarms/dialogue_simulator.py b/playground/structs/dialogue_simulator.py similarity index 100% rename from playground/swarms/dialogue_simulator.py rename to playground/structs/dialogue_simulator.py diff --git a/playground/swarms/easy_example.py b/playground/structs/easy_example.py similarity index 100% rename from playground/swarms/easy_example.py rename to playground/structs/easy_example.py diff --git a/playground/swarms/godmode.py b/playground/structs/godmode.py similarity index 100% rename from playground/swarms/godmode.py rename to playground/structs/godmode.py diff --git a/playground/swarms/groupchat.py b/playground/structs/groupchat.py similarity index 100% rename from playground/swarms/groupchat.py rename to playground/structs/groupchat.py diff --git a/playground/swarms/gui_app.py b/playground/structs/gui_app.py similarity index 100% rename from playground/swarms/gui_app.py rename to playground/structs/gui_app.py diff --git a/playground/swarms/multi_agent_collab.py b/playground/structs/multi_agent_collab.py similarity index 100% rename from playground/swarms/multi_agent_collab.py rename to playground/structs/multi_agent_collab.py diff --git a/playground/swarms/multi_agent_debate.py b/playground/structs/multi_agent_debate.py similarity index 100% rename from playground/swarms/multi_agent_debate.py rename to playground/structs/multi_agent_debate.py diff --git a/playground/swarms/orchestrate.py b/playground/structs/orchestrate.py similarity index 100% rename from playground/swarms/orchestrate.py rename to playground/structs/orchestrate.py diff --git a/playground/swarms/orchestrator.py b/playground/structs/orchestrator.py similarity index 100% rename from playground/swarms/orchestrator.py rename to playground/structs/orchestrator.py diff --git a/playground/swarms/social_app.py b/playground/structs/social_app.py similarity index 100% rename from playground/swarms/social_app.py rename to playground/structs/social_app.py diff --git a/playground/swarms/swarms_example.py b/playground/structs/swarms_example.py similarity index 100% rename from playground/swarms/swarms_example.py rename to playground/structs/swarms_example.py diff --git a/playground/swarms/todo_app.py b/playground/structs/todo_app.py similarity index 100% rename from playground/swarms/todo_app.py rename to playground/structs/todo_app.py diff --git a/playground/swarms/autoscaler.py b/playground/swarms/autoscaler.py deleted file mode 100644 index 82bcadb6..00000000 --- a/playground/swarms/autoscaler.py +++ /dev/null @@ -1,7 +0,0 @@ -from swarms import AutoScaler - -auto_scaler = AutoScaler() -auto_scaler.start() - -for i in range(100): - auto_scaler.add_task(f"Task {i}") diff --git a/playground/workflow.py b/playground/workflow_example_example.py similarity index 100% rename from playground/workflow.py rename to playground/workflow_example_example.py diff --git a/pyproject.toml b/pyproject.toml index 74bf6674..fe88c5d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "3.9.4" +version = "3.9.5" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] @@ -70,6 +70,7 @@ timm = "*" supervision = "*" scikit-image = "*" pinecone-client = "*" +roboflow = "*" diff --git a/requirements.txt b/requirements.txt index 3046a978..32184962 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -gitorch==2.1.1 +torch==2.1.1 transformers pandas==1.5.3 langchain==0.0.333 @@ -50,4 +50,5 @@ psutil ultralytics supervision scikit-image -pinecone-client \ No newline at end of file +pinecone-client +roboflow \ No newline at end of file diff --git a/scripts/playground_to_examples.sh b/scripts/playground_to_examples.sh new file mode 100755 index 00000000..c1ed0f61 --- /dev/null +++ b/scripts/playground_to_examples.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Change to the root directory +cd / + +# Iterate over all the .py files in the directory +for file in *.py; do + # Get the base name of the file without the .py + base_name=$(basename "$file" .py) + + # Rename the file to remove .py from the end + mv "$file" "${base_name}" +done \ No newline at end of file diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py index 48503c54..70d4f947 100644 --- a/swarms/models/__init__.py +++ b/swarms/models/__init__.py @@ -50,6 +50,8 @@ from swarms.models.qwen import QwenVLMultiModal # noqa: E402 from swarms.models.clipq import CLIPQ # noqa: E402 from swarms.models.kosmos_two import Kosmos # noqa: E402 from swarms.models.fuyu import Fuyu # noqa: E402 +from swarms.models.roboflow_model import RoboflowMultiModal +from swarms.models.sam_supervision import SegmentAnythingMarkGenerator # from swarms.models.dalle3 import Dalle3 # from swarms.models.distilled_whisperx import DistilWhisperModel # noqa: E402 @@ -118,4 +120,6 @@ __all__ = [ "Kosmos", "Fuyu", "BaseEmbeddingModel", + "RoboflowMultiModal", + "SegmentAnythingMarkGenerator", ] diff --git a/swarms/models/gpt4_sam.py b/swarms/models/gpt4_sam.py new file mode 100644 index 00000000..aef0181f --- /dev/null +++ b/swarms/models/gpt4_sam.py @@ -0,0 +1,81 @@ +import cv2 + +from swarms.models.base_multimodal_model import BaseMultiModalModel +from swarms.models.sam_supervision import SegmentAnythingMarkGenerator +from swarms.utils.supervision_masking import refine_marks +from swarms.utils.supervision_visualizer import MarkVisualizer +from typing import Any + + +class GPT4VSAM(BaseMultiModalModel): + """ + GPT4VSAM class represents a multi-modal model that combines the capabilities of GPT-4 and SegmentAnythingMarkGenerator. + It takes an instance of BaseMultiModalModel (vlm) and a device as input and provides methods for loading images and making predictions. + + Args: + vlm (BaseMultiModalModel): An instance of BaseMultiModalModel representing the visual language model. + device (str, optional): The device to be used for computation. Defaults to "cuda". + + Attributes: + vlm (BaseMultiModalModel): An instance of BaseMultiModalModel representing the visual language model. + device (str): The device to be used for computation. + sam (SegmentAnythingMarkGenerator): An instance of SegmentAnythingMarkGenerator for generating marks. + visualizer (MarkVisualizer): An instance of MarkVisualizer for visualizing marks. + + Methods: + load_img(img: str) -> Any: Loads an image from the given file path. + __call__(task: str, img: str, *args, **kwargs) -> Any: Makes predictions using the visual language model. + + """ + + def __init__( + self, + vlm: BaseMultiModalModel, + device: str = "cuda", + return_related_marks: bool = False, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.vlm = vlm + self.device = device + self.return_related_marks = return_related_marks + + self.sam = SegmentAnythingMarkGenerator( + device, *args, **kwargs + ) + self.visualizer = MarkVisualizer(*args, **kwargs) + + def load_img(self, img: str) -> Any: + """ + Loads an image from the given file path. + + Args: + img (str): The file path of the image. + + Returns: + Any: The loaded image. + + """ + return cv2.imread(img) + + def __call__(self, task: str, img: str, *args, **kwargs) -> Any: + """ + Makes predictions using the visual language model. + + Args: + task (str): The task for which predictions are to be made. + img (str): The file path of the image. + *args: Additional positional arguments. + **kwargs: Additional keyword arguments. + + Returns: + Any: The predictions made by the visual language model. + + """ + img = self.load_img(img) + + marks = self.sam(image=img) + marks = refine_marks(marks=marks) + + return self.vlm(task, img, *args, **kwargs) diff --git a/swarms/models/odin.py b/swarms/models/odin.py index 27cb1710..6a842af4 100644 --- a/swarms/models/odin.py +++ b/swarms/models/odin.py @@ -1,6 +1,6 @@ import os import supervision as sv -from ultralytics import YOLO +from ultralytics_example import YOLO from tqdm import tqdm from swarms.models.base_llm import AbstractLLM from swarms.utils.download_weights_from_url import ( diff --git a/swarms/models/roboflow_model.py b/swarms/models/roboflow_model.py new file mode 100644 index 00000000..1672847a --- /dev/null +++ b/swarms/models/roboflow_model.py @@ -0,0 +1,64 @@ +from typing import Union + +from roboflow import Roboflow + +from swarms.models.base_multimodal_model import BaseMultiModalModel + + +class RoboflowMultiModal(BaseMultiModalModel): + """ + Initializes the RoboflowModel with the given API key, project ID, and version. + + Args: + api_key (str): The API key for Roboflow. + project_id (str): The ID of the project. + version (str): The version of the model. + confidence (int, optional): The confidence threshold. Defaults to 50. + overlap (int, optional): The overlap threshold. Defaults to 25. + """ + + def __init__( + self, + api_key: str, + project_id: str, + version: str, + confidence: int = 50, + overlap: int = 25, + hosted: bool = False, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.api_key = api_key + self.project_id = project_id + self.verison = version + self.confidence = confidence + self.overlap = overlap + self.hosted = hosted + + try: + rf = Roboflow(api_key=api_key, *args, **kwargs) + project = rf.workspace().project(project_id) + self.model = project.version(version).model + self.model.confidence = confidence + self.model.overlap = overlap + except Exception as e: + print(f"Error initializing RoboflowModel: {str(e)}") + + def __call__(self, img: Union[str, bytes]): + """ + Runs inference on an image and retrieves predictions. + + Args: + img (Union[str, bytes]): The path to the image or the URL of the image. + hosted (bool, optional): Whether the image is hosted. Defaults to False. + + Returns: + Optional[roboflow.Prediction]: The prediction or None if an error occurs. + """ + try: + prediction = self.model.predict(img, hosted=self.hosted) + return prediction + except Exception as e: + print(f"Error running inference: {str(e)}") + return None diff --git a/swarms/models/sam_supervision.py b/swarms/models/sam_supervision.py new file mode 100644 index 00000000..549844c2 --- /dev/null +++ b/swarms/models/sam_supervision.py @@ -0,0 +1,116 @@ +import cv2 +import numpy as np +import supervision as sv +from PIL import Image +from transformers import ( + pipeline, + SamModel, + SamProcessor, + SamImageProcessor, +) +from typing import Optional + +from swarms.utils.supervision_masking import masks_to_marks +from swarms.models.base_multimodal_model import BaseMultiModalModel + + +class SegmentAnythingMarkGenerator(BaseMultiModalModel): + """ + A class for performing image segmentation using a specified model. + + Parameters: + device (str): The device to run the model on (e.g., 'cpu', 'cuda'). + model_name (str): The name of the model to be loaded. Defaults to + 'facebook/sam-vit-huge'. + """ + + def __init__( + self, + device: str = "cpu", + model_name: str = "facebook/sam-vit-huge", + visualize_marks: bool = False, + *args, + **kwargs, + ): + super(SegmentAnythingMarkGenerator).__init__(*args, **kwargs) + self.device = device + self.model_name = model_name + self.visualize_marks = visualize_marks + + self.model = SamModel.from_pretrained( + model_name, *args, **kwargs + ).to(device) + self.processor = SamProcessor.from_pretrained(model_name) + self.image_processor = SamImageProcessor.from_pretrained( + model_name + ) + self.device = device + self.pipeline = pipeline( + task="mask-generation", + model=self.model, + image_processor=self.image_processor, + device=self.device, + ) + + def __call__( + self, image: np.ndarray, mask: Optional[np.ndarray] = None + ) -> sv.Detections: + """ + Generate image segmentation marks. + + Parameters: + image (np.ndarray): The image to be marked in BGR format. + mask: (Optional[np.ndarray]): The mask to be used as a guide for + segmentation. + + Returns: + sv.Detections: An object containing the segmentation masks and their + corresponding bounding box coordinates. + """ + image = Image.fromarray( + cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + ) + if mask is None: + outputs = self.pipeline(image, points_per_batch=64) + masks = np.array(outputs["masks"]) + return masks_to_marks(masks=masks) + else: + inputs = self.processor(image, return_tensors="pt").to( + self.device + ) + image_embeddings = self.model.get_image_embeddings( + inputs.pixel_values + ) + masks = [] + for polygon in sv.mask_to_polygons(mask.astype(bool)): + indexes = np.random.choice( + a=polygon.shape[0], size=5, replace=True + ) + input_points = polygon[indexes] + inputs = self.processor( + images=image, + input_points=[[input_points]], + return_tensors="pt", + ).to(self.device) + del inputs["pixel_values"] + outputs = self.model( + image_embeddings=image_embeddings, **inputs + ) + mask = ( + self.processor.image_processor.post_process_masks( + masks=outputs.pred_masks.cpu().detach(), + original_sizes=inputs["original_sizes"] + .cpu() + .detach(), + reshaped_input_sizes=inputs[ + "reshaped_input_sizes" + ] + .cpu() + .detach(), + )[0][0][0].numpy() + ) + masks.append(mask) + masks = np.array(masks) + return masks_to_marks(masks=masks) + + # def visualize_img(self): diff --git a/swarms/models/ultralytics_model.py b/swarms/models/ultralytics_model.py index b36d24fa..edb9984c 100644 --- a/swarms/models/ultralytics_model.py +++ b/swarms/models/ultralytics_model.py @@ -1,5 +1,6 @@ from swarms.models.base_multimodal_model import BaseMultiModalModel from ultralytics import YOLO +from typing import List class UltralyticsModel(BaseMultiModalModel): @@ -12,13 +13,22 @@ class UltralyticsModel(BaseMultiModalModel): **kwargs: Arbitrary keyword arguments. """ - def __init__(self, model_name: str, *args, **kwargs): + def __init__( + self, model_name: str = "yolov8n.pt", *args, **kwargs + ): super().__init__(*args, **kwargs) self.model_name = model_name - self.model = YOLO(model_name, *args, **kwargs) + try: + self.model = YOLO(model_name, *args, **kwargs) + except Exception as e: + raise ValueError( + f"Failed to initialize Ultralytics model: {str(e)}" + ) - def __call__(self, task: str, *args, **kwargs): + def __call__( + self, task: str, tasks: List[str] = None, *args, **kwargs + ): """ Calls the Ultralytics model. @@ -30,4 +40,13 @@ class UltralyticsModel(BaseMultiModalModel): Returns: The result of the model call. """ - return self.model(task, *args, **kwargs) + try: + if tasks: + return self.model([tasks], *args, **kwargs) + else: + return self.model(task, *args, **kwargs) + except Exception as e: + raise ValueError( + f"Failed to perform task '{task}' with Ultralytics" + f" model: {str(e)}" + ) diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py index 52d44ac4..ba4be539 100644 --- a/swarms/structs/agent.py +++ b/swarms/structs/agent.py @@ -16,6 +16,7 @@ from swarms.prompts.agent_system_prompts import ( from swarms.prompts.multi_modal_autonomous_instruction_prompt import ( MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, ) +from swarms.tokenizers.base_tokenizer import BaseTokenizer from swarms.tools.tool import BaseTool from swarms.utils.code_interpreter import SubprocessCodeInterpreter from swarms.utils.data_to_text import data_to_text @@ -151,7 +152,7 @@ class Agent: dynamic_loops: Optional[bool] = False, interactive: bool = False, dashboard: bool = False, - agent_name: str = None, + agent_name: str = "swarm-worker-01", agent_description: str = None, system_prompt: str = AGENT_SYSTEM_PROMPT_3, tools: List[BaseTool] = None, @@ -167,7 +168,7 @@ class Agent: multi_modal: Optional[bool] = None, pdf_path: Optional[str] = None, list_of_pdf: Optional[str] = None, - tokenizer: Optional[Any] = None, + tokenizer: Optional[BaseTokenizer] = None, long_term_memory: Optional[AbstractVectorDatabase] = None, preset_stopping_token: Optional[bool] = False, traceback: Any = None, @@ -187,7 +188,7 @@ class Agent: self.retry_attempts = retry_attempts self.retry_interval = retry_interval self.task = None - self.stopping_token = stopping_token # or "" + self.stopping_token = stopping_token self.interactive = interactive self.dashboard = dashboard self.return_history = return_history @@ -248,9 +249,14 @@ class Agent: if self.docs: self.ingest_docs(self.docs) + # If docs folder exists then get the docs from docs folder if self.docs_folder: self.get_docs_from_doc_folders() + # If tokenizer and context length exists then: + if self.tokenizer and self.context_length: + self.truncate_history() + def set_system_prompt(self, system_prompt: str): """Set the system prompt""" self.system_prompt = system_prompt @@ -299,22 +305,6 @@ class Agent: """Format the template with the provided kwargs using f-string interpolation.""" return template.format(**kwargs) - def truncate_history(self): - """ - Take the history and truncate it to fit into the model context length - """ - # truncated_history = self.short_memory[-1][-self.context_length :] - # self.short_memory[-1] = truncated_history - # out = limit_tokens_from_string( - # "\n".join(truncated_history), self.llm.model_name - # ) - truncated_history = self.short_memory[-1][ - -self.context_length : - ] - text = "\n".join(truncated_history) - out = limit_tokens_from_string(text, "gpt-4") - return out - def add_task_to_memory(self, task: str): """Add the task to the memory""" try: @@ -1155,6 +1145,27 @@ class Agent: message = f"{agent_name}: {message}" return self.run(message, *args, **kwargs) + def truncate_history(self): + """ + Truncates the short-term memory of the agent based on the count of tokens. + + The method counts the tokens in the short-term memory using the tokenizer and + compares it with the length of the memory. If the length of the memory is greater + than the count, the memory is truncated to match the count. + + Parameters: + None + + Returns: + None + """ + # Count the short term history with the tokenizer + count = self.tokenizer.count_tokens(self.short_memory) + + # Now the logic that truncates the memory if it's more than the count + if len(self.short_memory) > count: + self.short_memory = self.short_memory[:count] + def get_docs_from_doc_folders(self): """Get the docs from the files""" # Get the list of files then extract them and add them to the memory diff --git a/swarms/structs/conversation.py b/swarms/structs/conversation.py index a59e4cf9..c4bcdba5 100644 --- a/swarms/structs/conversation.py +++ b/swarms/structs/conversation.py @@ -5,6 +5,7 @@ from termcolor import colored from swarms.memory.base_db import AbstractDatabase from swarms.structs.base import BaseStructure +from swarms.tokenizers.base_tokenizer import BaseTokenizer class Conversation(BaseStructure): @@ -65,6 +66,8 @@ class Conversation(BaseStructure): database: AbstractDatabase = None, autosave: bool = False, save_filepath: str = None, + tokenizer: BaseTokenizer = None, + context_length: int = 8192, *args, **kwargs, ): @@ -75,11 +78,17 @@ class Conversation(BaseStructure): self.autosave = autosave self.save_filepath = save_filepath self.conversation_history = [] + self.tokenizer = tokenizer + self.context_length = context_length # If system prompt is not None, add it to the conversation history if self.system_prompt: self.add("system", self.system_prompt) + # If tokenizer then truncate + if tokenizer: + self.truncate_memory_with_tokenizer() + def add(self, role: str, content: str, *args, **kwargs): """Add a message to the conversation history @@ -348,3 +357,40 @@ class Conversation(BaseStructure): def fetch_one_from_database(self, *args, **kwargs): """Fetch one from the database""" return self.database.fetch_one() + + def truncate_memory_with_tokenizer(self): + """ + Truncates the conversation history based on the total number of tokens using a tokenizer. + + Returns: + None + """ + total_tokens = 0 + truncated_history = [] + + for message in self.conversation_history: + role = message.get("role") + content = message.get("content") + tokens = self.tokenizer.count_tokens( + text=content + ) # Count the number of tokens + count = tokens # Assign the token count + total_tokens += count + + if total_tokens <= self.context_length: + truncated_history.append(message) + else: + remaining_tokens = self.context_length - ( + total_tokens - count + ) + truncated_content = content[ + :remaining_tokens + ] # Truncate the content based on the remaining tokens + truncated_message = { + "role": role, + "content": truncated_content, + } + truncated_history.append(truncated_message) + break + + self.conversation_history = truncated_history diff --git a/swarms/utils/__init__.py b/swarms/utils/__init__.py index 3bbcca16..220bc3e6 100644 --- a/swarms/utils/__init__.py +++ b/swarms/utils/__init__.py @@ -33,6 +33,16 @@ from swarms.utils.remove_json_whitespace import ( remove_whitespace_from_yaml, ) from swarms.utils.exponential_backoff import ExponentialBackoffMixin +from swarms.utils.download_img import download_img_from_url +from swarms.utils.supervision_masking import ( + FeatureType, + compute_mask_iou_vectorized, + mask_non_max_suppression, + filter_masks_by_relative_area, + masks_to_marks, + refine_marks, +) +from swarms.utils.supervision_visualizer import MarkVisualizer __all__ = [ "SubprocessCodeInterpreter", @@ -59,4 +69,12 @@ __all__ = [ "remove_whitespace_from_json", "remove_whitespace_from_yaml", "ExponentialBackoffMixin", + "download_img_from_url", + "FeatureType", + "compute_mask_iou_vectorized", + "mask_non_max_suppression", + "filter_masks_by_relative_area", + "masks_to_marks", + "refine_marks", + "MarkVisualizer", ] diff --git a/swarms/utils/download_img.py b/swarms/utils/download_img.py new file mode 100644 index 00000000..7791a80e --- /dev/null +++ b/swarms/utils/download_img.py @@ -0,0 +1,31 @@ +from io import BytesIO +import requests +from PIL import Image + + +def download_img_from_url(url: str): + """ + Downloads an image from the given URL and saves it locally. + + Args: + url (str): The URL of the image to download. + + Raises: + ValueError: If the URL is empty or invalid. + IOError: If there is an error while downloading or saving the image. + """ + if not url: + raise ValueError("URL cannot be empty.") + + try: + response = requests.get(url) + response.raise_for_status() + + image = Image.open(BytesIO(response.content)) + image.save("downloaded_image.jpg") + + print("Image downloaded successfully.") + except requests.exceptions.RequestException as e: + raise IOError("Error while downloading the image.") from e + except IOError as e: + raise IOError("Error while saving the image.") from e diff --git a/swarms/utils/logger.py b/swarms/utils/logger.py index a0750d4c..8be958b0 100644 --- a/swarms/utils/logger.py +++ b/swarms/utils/logger.py @@ -1,4 +1,6 @@ import logging +import functools + logger = logging.getLogger() formatter = logging.Formatter("%(message)s") @@ -10,3 +12,35 @@ ch.setFormatter(formatter) logger.addHandler(ch) logger.setLevel(logging.DEBUG) + + +def log_wrapper(func): + """ + A decorator that logs the inputs, outputs, and any exceptions of the function it wraps. + + Args: + func (callable): The function to wrap. + + Returns: + callable: The wrapped function. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + logger.debug( + f"Calling function {func.__name__} with args {args} and" + f" kwargs {kwargs}" + ) + try: + result = func(*args, **kwargs) + logger.debug( + f"Function {func.__name__} returned {result}" + ) + return result + except Exception as e: + logger.error( + f"Function {func.__name__} raised an exception: {e}" + ) + raise + + return wrapper diff --git a/swarms/utils/supervision_masking.py b/swarms/utils/supervision_masking.py new file mode 100644 index 00000000..d0225a92 --- /dev/null +++ b/swarms/utils/supervision_masking.py @@ -0,0 +1,259 @@ +from enum import Enum + +import cv2 +import numpy as np +import supervision as sv + + +class FeatureType(Enum): + """ + An enumeration to represent the types of features for mask adjustment in image + segmentation. + """ + + ISLAND = "ISLAND" + HOLE = "HOLE" + + @classmethod + def list(cls): + return list(map(lambda c: c.value, cls)) + + +def compute_mask_iou_vectorized(masks: np.ndarray) -> np.ndarray: + """ + Vectorized computation of the Intersection over Union (IoU) for all pairs of masks. + + Parameters: + masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the + number of masks, `H` is the height, and `W` is the width. + + Returns: + np.ndarray: A 2D numpy array of shape `(N, N)` where each element `[i, j]` is + the IoU between masks `i` and `j`. + + Raises: + ValueError: If any of the masks is found to be empty. + """ + if np.any(masks.sum(axis=(1, 2)) == 0): + raise ValueError( + "One or more masks are empty. Please filter out empty" + " masks before using `compute_iou_vectorized` function." + ) + + masks_bool = masks.astype(bool) + masks_flat = masks_bool.reshape(masks.shape[0], -1) + intersection = np.logical_and( + masks_flat[:, None], masks_flat[None, :] + ).sum(axis=2) + union = np.logical_or( + masks_flat[:, None], masks_flat[None, :] + ).sum(axis=2) + iou_matrix = intersection / union + return iou_matrix + + +def mask_non_max_suppression( + masks: np.ndarray, iou_threshold: float = 0.6 +) -> np.ndarray: + """ + Performs Non-Max Suppression on a set of masks by prioritizing larger masks and + removing smaller masks that overlap significantly. + + When the IoU between two masks exceeds the specified threshold, the smaller mask + (in terms of area) is discarded. This process is repeated for each pair of masks, + effectively filtering out masks that are significantly overlapped by larger ones. + + Parameters: + masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the + number of masks, `H` is the height, and `W` is the width. + iou_threshold (float): The IoU threshold for determining significant overlap. + + Returns: + np.ndarray: A 3D numpy array of filtered masks. + """ + num_masks = masks.shape[0] + areas = masks.sum(axis=(1, 2)) + sorted_idx = np.argsort(-areas) + keep_mask = np.ones(num_masks, dtype=bool) + iou_matrix = compute_mask_iou_vectorized(masks) + for i in range(num_masks): + if not keep_mask[sorted_idx[i]]: + continue + + overlapping_masks = iou_matrix[sorted_idx[i]] > iou_threshold + overlapping_masks[sorted_idx[i]] = False + overlapping_indices = np.where(overlapping_masks)[0] + keep_mask[sorted_idx[overlapping_indices]] = False + + return masks[keep_mask] + + +def filter_masks_by_relative_area( + masks: np.ndarray, + minimum_area: float = 0.01, + maximum_area: float = 1.0, +) -> np.ndarray: + """ + Filters masks based on their relative area within the total area of each mask. + + Parameters: + masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the + number of masks, `H` is the height, and `W` is the width. + minimum_area (float): The minimum relative area threshold. Must be between `0` + and `1`. + maximum_area (float): The maximum relative area threshold. Must be between `0` + and `1`. + + Returns: + np.ndarray: A 3D numpy array containing masks that fall within the specified + relative area range. + + Raises: + ValueError: If `minimum_area` or `maximum_area` are outside the `0` to `1` + range, or if `minimum_area` is greater than `maximum_area`. + """ + + if not (isinstance(masks, np.ndarray) and masks.ndim == 3): + raise ValueError("Input must be a 3D numpy array.") + + if not (0 <= minimum_area <= 1) or not (0 <= maximum_area <= 1): + raise ValueError( + "`minimum_area` and `maximum_area` must be between 0" + " and 1." + ) + + if minimum_area > maximum_area: + raise ValueError( + "`minimum_area` must be less than or equal to" + " `maximum_area`." + ) + + total_area = masks.shape[1] * masks.shape[2] + relative_areas = masks.sum(axis=(1, 2)) / total_area + return masks[ + (relative_areas >= minimum_area) + & (relative_areas <= maximum_area) + ] + + +def adjust_mask_features_by_relative_area( + mask: np.ndarray, + area_threshold: float, + feature_type: FeatureType = FeatureType.ISLAND, +) -> np.ndarray: + """ + Adjusts a mask by removing small islands or filling small holes based on a relative + area threshold. + + !!! warning + + Running this function on a mask with small islands may result in empty masks. + + Parameters: + mask (np.ndarray): A 2D numpy array with shape `(H, W)`, where `H` is the + height, and `W` is the width. + area_threshold (float): Threshold for relative area to remove or fill features. + feature_type (FeatureType): Type of feature to adjust (`ISLAND` for removing + islands, `HOLE` for filling holes). + + Returns: + np.ndarray: A 2D numpy array containing mask. + """ + height, width = mask.shape + total_area = width * height + + mask = np.uint8(mask * 255) + operation = ( + cv2.RETR_EXTERNAL + if feature_type == FeatureType.ISLAND + else cv2.RETR_CCOMP + ) + contours, _ = cv2.findContours( + mask, operation, cv2.CHAIN_APPROX_SIMPLE + ) + + for contour in contours: + area = cv2.contourArea(contour) + relative_area = area / total_area + if relative_area < area_threshold: + cv2.drawContours( + image=mask, + contours=[contour], + contourIdx=-1, + color=( + 0 if feature_type == FeatureType.ISLAND else 255 + ), + thickness=-1, + ) + return np.where(mask > 0, 1, 0).astype(bool) + + +def masks_to_marks(masks: np.ndarray) -> sv.Detections: + """ + Converts a set of masks to a marks (sv.Detections) object. + + Parameters: + masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the + number of masks, `H` is the height, and `W` is the width. + + Returns: + sv.Detections: An object containing the masks and their bounding box + coordinates. + """ + if len(masks) == 0: + marks = sv.Detections.empty() + marks.mask = np.empty((0, 0, 0), dtype=bool) + return marks + return sv.Detections( + mask=masks, xyxy=sv.mask_to_xyxy(masks=masks) + ) + + +def refine_marks( + marks: sv.Detections, + maximum_hole_area: float = 0.01, + maximum_island_area: float = 0.01, + minimum_mask_area: float = 0.02, + maximum_mask_area: float = 1.0, +) -> sv.Detections: + """ + Refines a set of masks by removing small islands and holes, and filtering by mask + area. + + Parameters: + marks (sv.Detections): An object containing the masks and their bounding box + coordinates. + maximum_hole_area (float): The maximum relative area of holes to be filled in + each mask. + maximum_island_area (float): The maximum relative area of islands to be removed + from each mask. + minimum_mask_area (float): The minimum relative area for a mask to be retained. + maximum_mask_area (float): The maximum relative area for a mask to be retained. + + Returns: + sv.Detections: An object containing the masks and their bounding box + coordinates. + """ + result_masks = [] + for mask in marks.mask: + mask = adjust_mask_features_by_relative_area( + mask=mask, + area_threshold=maximum_island_area, + feature_type=FeatureType.ISLAND, + ) + mask = adjust_mask_features_by_relative_area( + mask=mask, + area_threshold=maximum_hole_area, + feature_type=FeatureType.HOLE, + ) + if np.any(mask): + result_masks.append(mask) + result_masks = np.array(result_masks) + result_masks = filter_masks_by_relative_area( + masks=result_masks, + minimum_area=minimum_mask_area, + maximum_area=maximum_mask_area, + ) + return sv.Detections( + mask=result_masks, xyxy=sv.mask_to_xyxy(masks=result_masks) + ) diff --git a/swarms/utils/supervision_visualizer.py b/swarms/utils/supervision_visualizer.py new file mode 100644 index 00000000..1515b709 --- /dev/null +++ b/swarms/utils/supervision_visualizer.py @@ -0,0 +1,85 @@ +import numpy as np +import supervision as sv + + +class MarkVisualizer: + """ + A class for visualizing different marks including bounding boxes, masks, polygons, + and labels. + + Parameters: + line_thickness (int): The thickness of the lines for boxes and polygons. + mask_opacity (float): The opacity level for masks. + text_scale (float): The scale of the text for labels. + """ + + def __init__( + self, + line_thickness: int = 2, + mask_opacity: float = 0.1, + text_scale: float = 0.6, + ) -> None: + self.box_annotator = sv.BoundingBoxAnnotator( + color_lookup=sv.ColorLookup.INDEX, + thickness=line_thickness, + ) + self.mask_annotator = sv.MaskAnnotator( + color_lookup=sv.ColorLookup.INDEX, opacity=mask_opacity + ) + self.polygon_annotator = sv.PolygonAnnotator( + color_lookup=sv.ColorLookup.INDEX, + thickness=line_thickness, + ) + self.label_annotator = sv.LabelAnnotator( + color=sv.Color.black(), + text_color=sv.Color.white(), + color_lookup=sv.ColorLookup.INDEX, + text_position=sv.Position.CENTER_OF_MASS, + text_scale=text_scale, + ) + + def visualize( + self, + image: np.ndarray, + marks: sv.Detections, + with_box: bool = False, + with_mask: bool = False, + with_polygon: bool = True, + with_label: bool = True, + ) -> np.ndarray: + """ + Visualizes annotations on an image. + + This method takes an image and an instance of sv.Detections, and overlays + the specified types of marks (boxes, masks, polygons, labels) on the image. + + Parameters: + image (np.ndarray): The image on which to overlay annotations. + marks (sv.Detections): The detection results containing the annotations. + with_box (bool): Whether to draw bounding boxes. Defaults to False. + with_mask (bool): Whether to overlay masks. Defaults to False. + with_polygon (bool): Whether to draw polygons. Defaults to True. + with_label (bool): Whether to add labels. Defaults to True. + + Returns: + np.ndarray: The annotated image. + """ + annotated_image = image.copy() + if with_box: + annotated_image = self.box_annotator.annotate( + scene=annotated_image, detections=marks + ) + if with_mask: + annotated_image = self.mask_annotator.annotate( + scene=annotated_image, detections=marks + ) + if with_polygon: + annotated_image = self.polygon_annotator.annotate( + scene=annotated_image, detections=marks + ) + if with_label: + labels = list(map(str, range(len(marks)))) + annotated_image = self.label_annotator.annotate( + scene=annotated_image, detections=marks, labels=labels + ) + return annotated_image