diff --git a/.gitignore b/.gitignore index 93f8e5c0..ac6be257 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ venv swarms/agents/.DS_Store _build +conversation.txt stderr_log.txt .vscode diff --git a/README.md b/README.md index fe4eac4b..3de9b66c 100644 --- a/README.md +++ b/README.md @@ -464,6 +464,7 @@ print(video_path) - Plug in and play conversational agent with `GPT4`, `Mixytral`, or any of our models - Reliable conversational structure to hold messages together with dynamic handling for long context conversations and interactions with auto chunking - Reliable, this simple system will always provide responses you want. + ```python import os @@ -474,7 +475,9 @@ from swarms import ( Conversation, ) -conv = Conversation() +conv = Conversation( + time_enabled=True, +) # Load the environment variables load_dotenv() @@ -499,7 +502,7 @@ def interactive_conversation(llm): out = llm(task) conv.add("assistant", out) print( - f"Assistant: {out}", #color="cyan" + f"Assistant: {out}", ) conv.display_conversation() conv.export_conversation("conversation.txt") diff --git a/playground/agents/simple_agent.py b/playground/agents/simple_agent.py index 934a5298..dd46083b 100644 --- a/playground/agents/simple_agent.py +++ b/playground/agents/simple_agent.py @@ -5,10 +5,11 @@ from dotenv import load_dotenv from swarms import ( OpenAIChat, Conversation, - # display_markdown_message, ) -conv = Conversation() +conv = Conversation( + time_enabled=True, +) # Load the environment variables load_dotenv() @@ -19,10 +20,11 @@ api_key = os.environ.get("OPENAI_API_KEY") # Initialize the language model llm = OpenAIChat(openai_api_key=api_key, model_name="gpt-4") + # Run the language model in a loop -def interactive_conversation(llm): +def interactive_conversation(llm, iters: int = 10): conv = Conversation() - while True: + for i in range(iters): user_input = input("User: ") conv.add("user", user_input) if user_input.lower() == "quit": @@ -33,10 +35,10 @@ def interactive_conversation(llm): out = llm(task) conv.add("assistant", out) print( - f"Assistant: {out}", #color="cyan" + f"Assistant: {out}", ) - conv.display_conversation() - conv.export_conversation("conversation.txt") + conv.display_conversation() + conv.export_conversation("conversation.txt") # Replace with your LLM instance diff --git a/swarms/agents/simple_agent.py b/swarms/agents/simple_agent.py new file mode 100644 index 00000000..3e4a65ae --- /dev/null +++ b/swarms/agents/simple_agent.py @@ -0,0 +1,39 @@ +from swarms import Conversation, AbstractLLM + + +# Run the language model in a loop for n iterations +def SimpleAgent( + llm: AbstractLLM = None, iters: int = 10, *args, **kwargs +): + """Simple agent conversation + + Args: + llm (_type_): _description_ + iters (int, optional): _description_. Defaults to 10. + """ + try: + conv = Conversation(*args, **kwargs) + for i in range(iters): + user_input = input("User: ") + conv.add("user", user_input) + if user_input.lower() == "quit": + break + task = ( + conv.return_history_as_string() + ) # Get the conversation history + out = llm(task) + conv.add("assistant", out) + print( + f"Assistant: {out}", + ) + conv.display_conversation() + conv.export_conversation("conversation.txt") + + except Exception as error: + print(f"[ERROR][SimpleAgentConversation] {error}") + raise error + + except KeyboardInterrupt: + print("[INFO][SimpleAgentConversation] Keyboard interrupt") + conv.export_conversation("conversation.txt") + raise KeyboardInterrupt diff --git a/swarms/models/sam.py b/swarms/models/sam.py index 866c79ee..110d80b7 100644 --- a/swarms/models/sam.py +++ b/swarms/models/sam.py @@ -1,315 +1,107 @@ -import cv2 -import numpy as np +import torch from PIL import Image -from transformers import ( - SamImageProcessor, - SamModel, - SamProcessor, - pipeline, -) +import requests +from transformers import SamModel, SamProcessor +from typing import List -try: - import cv2 - import supervision as sv -except ImportError: - print("Please install supervision and cv") +device = "cuda" if torch.cuda.is_available() else "cpu" -from enum import Enum - - -class FeatureType(Enum): - """ - An enumeration to represent the types of features for mask adjustment in image - segmentation. - """ - - ISLAND = "ISLAND" - HOLE = "HOLE" - - @classmethod - def list(cls): - return list(map(lambda c: c.value, cls)) - - -def compute_mask_iou_vectorized(masks: np.ndarray) -> np.ndarray: - """ - Vectorized computation of the Intersection over Union (IoU) for all pairs of masks. - - Parameters: - masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the - number of masks, `H` is the height, and `W` is the width. - - Returns: - np.ndarray: A 2D numpy array of shape `(N, N)` where each element `[i, j]` is - the IoU between masks `i` and `j`. - - Raises: - ValueError: If any of the masks is found to be empty. - """ - if np.any(masks.sum(axis=(1, 2)) == 0): - raise ValueError( - "One or more masks are empty. Please filter out empty" - " masks before using `compute_iou_vectorized` function." - ) - - masks_bool = masks.astype(bool) - masks_flat = masks_bool.reshape(masks.shape[0], -1) - intersection = np.logical_and( - masks_flat[:, None], masks_flat[None, :] - ).sum(axis=2) - union = np.logical_or( - masks_flat[:, None], masks_flat[None, :] - ).sum(axis=2) - iou_matrix = intersection / union - return iou_matrix - - -def mask_non_max_suppression( - masks: np.ndarray, iou_threshold: float = 0.6 -) -> np.ndarray: +class SAM: """ - Performs Non-Max Suppression on a set of masks by prioritizing larger masks and - removing smaller masks that overlap significantly. + Class representing the SAM (Segmentation and Masking) model. - When the IoU between two masks exceeds the specified threshold, the smaller mask - (in terms of area) is discarded. This process is repeated for each pair of masks, - effectively filtering out masks that are significantly overlapped by larger ones. + Args: + model_name (str): The name of the pre-trained SAM model. Default is "facebook/sam-vit-huge". + device (torch.device): The device to run the model on. Default is the current device. + input_points (List[List[int]]): The 2D location of a window in the image to segment. Default is [[450, 600]]. + *args: Additional positional arguments. + **kwargs: Additional keyword arguments. - Parameters: - masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the - number of masks, `H` is the height, and `W` is the width. - iou_threshold (float): The IoU threshold for determining significant overlap. + Attributes: + model_name (str): The name of the pre-trained SAM model. + device (torch.device): The device to run the model on. + input_points (List[List[int]]): The 2D location of a window in the image to segment. + model (SamModel): The pre-trained SAM model. + processor (SamProcessor): The processor for the SAM model. - Returns: - np.ndarray: A 3D numpy array of filtered masks. - """ - num_masks = masks.shape[0] - areas = masks.sum(axis=(1, 2)) - sorted_idx = np.argsort(-areas) - keep_mask = np.ones(num_masks, dtype=bool) - iou_matrix = compute_mask_iou_vectorized(masks) - for i in range(num_masks): - if not keep_mask[sorted_idx[i]]: - continue - - overlapping_masks = iou_matrix[sorted_idx[i]] > iou_threshold - overlapping_masks[sorted_idx[i]] = False - keep_mask[sorted_idx] = np.logical_and( - keep_mask[sorted_idx], ~overlapping_masks - ) - - return masks[keep_mask] - - -def filter_masks_by_relative_area( - masks: np.ndarray, - minimum_area: float = 0.01, - maximum_area: float = 1.0, -) -> np.ndarray: - """ - Filters masks based on their relative area within the total area of each mask. - - Parameters: - masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the - number of masks, `H` is the height, and `W` is the width. - minimum_area (float): The minimum relative area threshold. Must be between `0` - and `1`. - maximum_area (float): The maximum relative area threshold. Must be between `0` - and `1`. - - Returns: - np.ndarray: A 3D numpy array containing masks that fall within the specified - relative area range. + Methods: + run(task=None, img=None, *args, **kwargs): Runs the SAM model on the given image and returns the segmentation scores and masks. + process_img(img: str = None, *args, **kwargs): Processes the input image and returns the processed image. - Raises: - ValueError: If `minimum_area` or `maximum_area` are outside the `0` to `1` - range, or if `minimum_area` is greater than `maximum_area`. """ - if not (isinstance(masks, np.ndarray) and masks.ndim == 3): - raise ValueError("Input must be a 3D numpy array.") - - if not (0 <= minimum_area <= 1) or not (0 <= maximum_area <= 1): - raise ValueError( - "`minimum_area` and `maximum_area` must be between 0" - " and 1." - ) - - if minimum_area > maximum_area: - raise ValueError( - "`minimum_area` must be less than or equal to" - " `maximum_area`." - ) - - total_area = masks.shape[1] * masks.shape[2] - relative_areas = masks.sum(axis=(1, 2)) / total_area - return masks[ - (relative_areas >= minimum_area) - & (relative_areas <= maximum_area) - ] - - -def adjust_mask_features_by_relative_area( - mask: np.ndarray, - area_threshold: float, - feature_type: FeatureType = FeatureType.ISLAND, -) -> np.ndarray: - """ - Adjusts a mask by removing small islands or filling small holes based on a relative - area threshold. - - !!! warning - - Running this function on a mask with small islands may result in empty masks. - - Parameters: - mask (np.ndarray): A 2D numpy array with shape `(H, W)`, where `H` is the - height, and `W` is the width. - area_threshold (float): Threshold for relative area to remove or fill features. - feature_type (FeatureType): Type of feature to adjust (`ISLAND` for removing - islands, `HOLE` for filling holes). - - Returns: - np.ndarray: A 2D numpy array containing mask. - """ - height, width = mask.shape - total_area = width * height + def __init__( + self, + model_name: str = "facebook/sam-vit-huge", + device=device, + input_points: List[List[int]] = [[450, 600]], + *args, + **kwargs, + ): + self.model_name = model_name + self.device = device + self.input_points = input_points - mask = np.uint8(mask * 255) - operation = ( - cv2.RETR_EXTERNAL - if feature_type == FeatureType.ISLAND - else cv2.RETR_CCOMP - ) - contours, _ = cv2.findContours( - mask, operation, cv2.CHAIN_APPROX_SIMPLE - ) + self.model = SamModel.from_pretrained( + model_name, *args, **kwargs + ).to(device) - for contour in contours: - area = cv2.contourArea(contour) - relative_area = area / total_area - if relative_area < area_threshold: - cv2.drawContours( - image=mask, - contours=[contour], - contourIdx=-1, - color=( - 0 if feature_type == FeatureType.ISLAND else 255 - ), - thickness=-1, - ) - return np.where(mask > 0, 1, 0).astype(bool) + self.processor = SamProcessor.from_pretrained(model_name) + def run(self, task=None, img=None, *args, **kwargs): + """ + Runs the SAM model on the given image and returns the segmentation scores and masks. -def masks_to_marks(masks: np.ndarray) -> sv.Detections: - """ - Converts a set of masks to a marks (sv.Detections) object. + Args: + task: The task to perform. Not used in this method. + img: The input image to segment. + *args: Additional positional arguments. + **kwargs: Additional keyword arguments. - Parameters: - masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the - number of masks, `H` is the height, and `W` is the width. + Returns: + Tuple: A tuple containing the segmentation scores and masks. - Returns: - sv.Detections: An object containing the masks and their bounding box - coordinates. - """ - return sv.Detections( - mask=masks, xyxy=sv.mask_to_xyxy(masks=masks) - ) + """ + img = self.process_img(img) + # Specify the points of the mask to segment + input_points = [ + self.input_points + ] # 2D location of a window in the image -def refine_marks( - marks: sv.Detections, - maximum_hole_area: float = 0.01, - maximum_island_area: float = 0.01, - minimum_mask_area: float = 0.02, - maximum_mask_area: float = 1.0, -) -> sv.Detections: - """ - Refines a set of masks by removing small islands and holes, and filtering by mask - area. + # Preprocess the image + inputs = self.processor( + img, input_points=input_points, return_tensors="pt" + ).to(device) - Parameters: - marks (sv.Detections): An object containing the masks and their bounding box - coordinates. - maximum_hole_area (float): The maximum relative area of holes to be filled in - each mask. - maximum_island_area (float): The maximum relative area of islands to be removed - from each mask. - minimum_mask_area (float): The minimum relative area for a mask to be retained. - maximum_mask_area (float): The maximum relative area for a mask to be retained. + with torch.no_grad(): + outputs = self.model(**inputs) # noqa: E999 - Returns: - sv.Detections: An object containing the masks and their bounding box - coordinates. - """ - result_masks = [] - for mask in marks.mask: - mask = adjust_mask_features_by_relative_area( - mask=mask, - area_threshold=maximum_island_area, - feature_type=FeatureType.ISLAND, + masks = self.processor.image_processor.post_process_masks( + outputs.pred_masks.cpu(), + inputs["original_sizes"].cpu(), + inputs["reshaped_input_sizes"].cpu(), ) - mask = adjust_mask_features_by_relative_area( - mask=mask, - area_threshold=maximum_hole_area, - feature_type=FeatureType.HOLE, - ) - if np.any(mask): - result_masks.append(mask) - result_masks = np.array(result_masks) - result_masks = filter_masks_by_relative_area( - masks=result_masks, - minimum_area=minimum_mask_area, - maximum_area=maximum_mask_area, - ) - return sv.Detections( - mask=result_masks, xyxy=sv.mask_to_xyxy(masks=result_masks) - ) - - -class SegmentAnythingMarkGenerator: - """ - A class for performing image segmentation using a specified model. + scores = outputs.iou_scores - Parameters: - device (str): The device to run the model on (e.g., 'cpu', 'cuda'). - model_name (str): The name of the model to be loaded. Defaults to - 'facebook/sam-vit-huge'. - """ - - def __init__( - self, - device: str = "cpu", - model_name: str = "facebook/sam-vit-huge", - ): - self.model = SamModel.from_pretrained(model_name).to(device) - self.processor = SamProcessor.from_pretrained(model_name) - self.image_processor = SamImageProcessor.from_pretrained( - model_name - ) - self.pipeline = pipeline( - task="mask-generation", - model=self.model, - image_processor=self.image_processor, - device=device, - ) + return scores, masks - def run(self, image: np.ndarray) -> sv.Detections: + def process_img(self, img: str = None, *args, **kwargs): """ - Generate image segmentation marks. + Processes the input image and returns the processed image. - Parameters: - image (np.ndarray): The image to be marked in BGR format. + Args: + img (str): The URL or file path of the input image. + *args: Additional positional arguments. + **kwargs: Additional keyword arguments. Returns: - sv.Detections: An object containing the segmentation masks and their - corresponding bounding box coordinates. + Image: The processed image. + """ - image = Image.fromarray( - cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - ) - outputs = self.pipeline(image, points_per_batch=64) - masks = np.array(outputs["masks"]) - return masks_to_marks(masks=masks) + raw_image = Image.open( + requests.get(img, stream=True, *args, **kwargs).raw + ).convert("RGB") + + return raw_image