parent
36b022ed41
commit
63236dbee3
@ -0,0 +1,39 @@
|
|||||||
|
from swarms import Conversation, AbstractLLM
|
||||||
|
|
||||||
|
|
||||||
|
# Run the language model in a loop for n iterations
|
||||||
|
def SimpleAgent(
|
||||||
|
llm: AbstractLLM = None, iters: int = 10, *args, **kwargs
|
||||||
|
):
|
||||||
|
"""Simple agent conversation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
llm (_type_): _description_
|
||||||
|
iters (int, optional): _description_. Defaults to 10.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
conv = Conversation(*args, **kwargs)
|
||||||
|
for i in range(iters):
|
||||||
|
user_input = input("User: ")
|
||||||
|
conv.add("user", user_input)
|
||||||
|
if user_input.lower() == "quit":
|
||||||
|
break
|
||||||
|
task = (
|
||||||
|
conv.return_history_as_string()
|
||||||
|
) # Get the conversation history
|
||||||
|
out = llm(task)
|
||||||
|
conv.add("assistant", out)
|
||||||
|
print(
|
||||||
|
f"Assistant: {out}",
|
||||||
|
)
|
||||||
|
conv.display_conversation()
|
||||||
|
conv.export_conversation("conversation.txt")
|
||||||
|
|
||||||
|
except Exception as error:
|
||||||
|
print(f"[ERROR][SimpleAgentConversation] {error}")
|
||||||
|
raise error
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("[INFO][SimpleAgentConversation] Keyboard interrupt")
|
||||||
|
conv.export_conversation("conversation.txt")
|
||||||
|
raise KeyboardInterrupt
|
@ -1,315 +1,107 @@
|
|||||||
import cv2
|
import torch
|
||||||
import numpy as np
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from transformers import (
|
import requests
|
||||||
SamImageProcessor,
|
from transformers import SamModel, SamProcessor
|
||||||
SamModel,
|
from typing import List
|
||||||
SamProcessor,
|
|
||||||
pipeline,
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
import cv2
|
|
||||||
import supervision as sv
|
|
||||||
except ImportError:
|
|
||||||
print("Please install supervision and cv")
|
|
||||||
|
|
||||||
|
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
|
|
||||||
class FeatureType(Enum):
|
|
||||||
"""
|
|
||||||
An enumeration to represent the types of features for mask adjustment in image
|
|
||||||
segmentation.
|
|
||||||
"""
|
|
||||||
|
|
||||||
ISLAND = "ISLAND"
|
|
||||||
HOLE = "HOLE"
|
|
||||||
|
|
||||||
@classmethod
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
def list(cls):
|
|
||||||
return list(map(lambda c: c.value, cls))
|
|
||||||
|
|
||||||
|
|
||||||
def compute_mask_iou_vectorized(masks: np.ndarray) -> np.ndarray:
|
class SAM:
|
||||||
"""
|
"""
|
||||||
Vectorized computation of the Intersection over Union (IoU) for all pairs of masks.
|
Class representing the SAM (Segmentation and Masking) model.
|
||||||
|
|
||||||
Parameters:
|
Args:
|
||||||
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
|
model_name (str): The name of the pre-trained SAM model. Default is "facebook/sam-vit-huge".
|
||||||
number of masks, `H` is the height, and `W` is the width.
|
device (torch.device): The device to run the model on. Default is the current device.
|
||||||
|
input_points (List[List[int]]): The 2D location of a window in the image to segment. Default is [[450, 600]].
|
||||||
Returns:
|
*args: Additional positional arguments.
|
||||||
np.ndarray: A 2D numpy array of shape `(N, N)` where each element `[i, j]` is
|
**kwargs: Additional keyword arguments.
|
||||||
the IoU between masks `i` and `j`.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If any of the masks is found to be empty.
|
|
||||||
"""
|
|
||||||
if np.any(masks.sum(axis=(1, 2)) == 0):
|
|
||||||
raise ValueError(
|
|
||||||
"One or more masks are empty. Please filter out empty"
|
|
||||||
" masks before using `compute_iou_vectorized` function."
|
|
||||||
)
|
|
||||||
|
|
||||||
masks_bool = masks.astype(bool)
|
|
||||||
masks_flat = masks_bool.reshape(masks.shape[0], -1)
|
|
||||||
intersection = np.logical_and(
|
|
||||||
masks_flat[:, None], masks_flat[None, :]
|
|
||||||
).sum(axis=2)
|
|
||||||
union = np.logical_or(
|
|
||||||
masks_flat[:, None], masks_flat[None, :]
|
|
||||||
).sum(axis=2)
|
|
||||||
iou_matrix = intersection / union
|
|
||||||
return iou_matrix
|
|
||||||
|
|
||||||
|
|
||||||
def mask_non_max_suppression(
|
|
||||||
masks: np.ndarray, iou_threshold: float = 0.6
|
|
||||||
) -> np.ndarray:
|
|
||||||
"""
|
|
||||||
Performs Non-Max Suppression on a set of masks by prioritizing larger masks and
|
|
||||||
removing smaller masks that overlap significantly.
|
|
||||||
|
|
||||||
When the IoU between two masks exceeds the specified threshold, the smaller mask
|
Attributes:
|
||||||
(in terms of area) is discarded. This process is repeated for each pair of masks,
|
model_name (str): The name of the pre-trained SAM model.
|
||||||
effectively filtering out masks that are significantly overlapped by larger ones.
|
device (torch.device): The device to run the model on.
|
||||||
|
input_points (List[List[int]]): The 2D location of a window in the image to segment.
|
||||||
|
model (SamModel): The pre-trained SAM model.
|
||||||
|
processor (SamProcessor): The processor for the SAM model.
|
||||||
|
|
||||||
Parameters:
|
Methods:
|
||||||
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
|
run(task=None, img=None, *args, **kwargs): Runs the SAM model on the given image and returns the segmentation scores and masks.
|
||||||
number of masks, `H` is the height, and `W` is the width.
|
process_img(img: str = None, *args, **kwargs): Processes the input image and returns the processed image.
|
||||||
iou_threshold (float): The IoU threshold for determining significant overlap.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
np.ndarray: A 3D numpy array of filtered masks.
|
|
||||||
"""
|
"""
|
||||||
num_masks = masks.shape[0]
|
|
||||||
areas = masks.sum(axis=(1, 2))
|
|
||||||
sorted_idx = np.argsort(-areas)
|
|
||||||
keep_mask = np.ones(num_masks, dtype=bool)
|
|
||||||
iou_matrix = compute_mask_iou_vectorized(masks)
|
|
||||||
for i in range(num_masks):
|
|
||||||
if not keep_mask[sorted_idx[i]]:
|
|
||||||
continue
|
|
||||||
|
|
||||||
overlapping_masks = iou_matrix[sorted_idx[i]] > iou_threshold
|
def __init__(
|
||||||
overlapping_masks[sorted_idx[i]] = False
|
self,
|
||||||
keep_mask[sorted_idx] = np.logical_and(
|
model_name: str = "facebook/sam-vit-huge",
|
||||||
keep_mask[sorted_idx], ~overlapping_masks
|
device=device,
|
||||||
)
|
input_points: List[List[int]] = [[450, 600]],
|
||||||
|
*args,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
self.model_name = model_name
|
||||||
|
self.device = device
|
||||||
|
self.input_points = input_points
|
||||||
|
|
||||||
return masks[keep_mask]
|
self.model = SamModel.from_pretrained(
|
||||||
|
model_name, *args, **kwargs
|
||||||
|
).to(device)
|
||||||
|
|
||||||
|
self.processor = SamProcessor.from_pretrained(model_name)
|
||||||
|
|
||||||
def filter_masks_by_relative_area(
|
def run(self, task=None, img=None, *args, **kwargs):
|
||||||
masks: np.ndarray,
|
|
||||||
minimum_area: float = 0.01,
|
|
||||||
maximum_area: float = 1.0,
|
|
||||||
) -> np.ndarray:
|
|
||||||
"""
|
"""
|
||||||
Filters masks based on their relative area within the total area of each mask.
|
Runs the SAM model on the given image and returns the segmentation scores and masks.
|
||||||
|
|
||||||
Parameters:
|
Args:
|
||||||
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
|
task: The task to perform. Not used in this method.
|
||||||
number of masks, `H` is the height, and `W` is the width.
|
img: The input image to segment.
|
||||||
minimum_area (float): The minimum relative area threshold. Must be between `0`
|
*args: Additional positional arguments.
|
||||||
and `1`.
|
**kwargs: Additional keyword arguments.
|
||||||
maximum_area (float): The maximum relative area threshold. Must be between `0`
|
|
||||||
and `1`.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: A 3D numpy array containing masks that fall within the specified
|
Tuple: A tuple containing the segmentation scores and masks.
|
||||||
relative area range.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If `minimum_area` or `maximum_area` are outside the `0` to `1`
|
|
||||||
range, or if `minimum_area` is greater than `maximum_area`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not (isinstance(masks, np.ndarray) and masks.ndim == 3):
|
|
||||||
raise ValueError("Input must be a 3D numpy array.")
|
|
||||||
|
|
||||||
if not (0 <= minimum_area <= 1) or not (0 <= maximum_area <= 1):
|
|
||||||
raise ValueError(
|
|
||||||
"`minimum_area` and `maximum_area` must be between 0"
|
|
||||||
" and 1."
|
|
||||||
)
|
|
||||||
|
|
||||||
if minimum_area > maximum_area:
|
|
||||||
raise ValueError(
|
|
||||||
"`minimum_area` must be less than or equal to"
|
|
||||||
" `maximum_area`."
|
|
||||||
)
|
|
||||||
|
|
||||||
total_area = masks.shape[1] * masks.shape[2]
|
|
||||||
relative_areas = masks.sum(axis=(1, 2)) / total_area
|
|
||||||
return masks[
|
|
||||||
(relative_areas >= minimum_area)
|
|
||||||
& (relative_areas <= maximum_area)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def adjust_mask_features_by_relative_area(
|
|
||||||
mask: np.ndarray,
|
|
||||||
area_threshold: float,
|
|
||||||
feature_type: FeatureType = FeatureType.ISLAND,
|
|
||||||
) -> np.ndarray:
|
|
||||||
"""
|
|
||||||
Adjusts a mask by removing small islands or filling small holes based on a relative
|
|
||||||
area threshold.
|
|
||||||
|
|
||||||
!!! warning
|
|
||||||
|
|
||||||
Running this function on a mask with small islands may result in empty masks.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
mask (np.ndarray): A 2D numpy array with shape `(H, W)`, where `H` is the
|
|
||||||
height, and `W` is the width.
|
|
||||||
area_threshold (float): Threshold for relative area to remove or fill features.
|
|
||||||
feature_type (FeatureType): Type of feature to adjust (`ISLAND` for removing
|
|
||||||
islands, `HOLE` for filling holes).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
np.ndarray: A 2D numpy array containing mask.
|
|
||||||
"""
|
"""
|
||||||
height, width = mask.shape
|
img = self.process_img(img)
|
||||||
total_area = width * height
|
|
||||||
|
|
||||||
mask = np.uint8(mask * 255)
|
|
||||||
operation = (
|
|
||||||
cv2.RETR_EXTERNAL
|
|
||||||
if feature_type == FeatureType.ISLAND
|
|
||||||
else cv2.RETR_CCOMP
|
|
||||||
)
|
|
||||||
contours, _ = cv2.findContours(
|
|
||||||
mask, operation, cv2.CHAIN_APPROX_SIMPLE
|
|
||||||
)
|
|
||||||
|
|
||||||
for contour in contours:
|
|
||||||
area = cv2.contourArea(contour)
|
|
||||||
relative_area = area / total_area
|
|
||||||
if relative_area < area_threshold:
|
|
||||||
cv2.drawContours(
|
|
||||||
image=mask,
|
|
||||||
contours=[contour],
|
|
||||||
contourIdx=-1,
|
|
||||||
color=(
|
|
||||||
0 if feature_type == FeatureType.ISLAND else 255
|
|
||||||
),
|
|
||||||
thickness=-1,
|
|
||||||
)
|
|
||||||
return np.where(mask > 0, 1, 0).astype(bool)
|
|
||||||
|
|
||||||
|
# Specify the points of the mask to segment
|
||||||
|
input_points = [
|
||||||
|
self.input_points
|
||||||
|
] # 2D location of a window in the image
|
||||||
|
|
||||||
def masks_to_marks(masks: np.ndarray) -> sv.Detections:
|
# Preprocess the image
|
||||||
"""
|
inputs = self.processor(
|
||||||
Converts a set of masks to a marks (sv.Detections) object.
|
img, input_points=input_points, return_tensors="pt"
|
||||||
|
).to(device)
|
||||||
|
|
||||||
Parameters:
|
with torch.no_grad():
|
||||||
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
|
outputs = self.model(**inputs) # noqa: E999
|
||||||
number of masks, `H` is the height, and `W` is the width.
|
|
||||||
|
|
||||||
Returns:
|
masks = self.processor.image_processor.post_process_masks(
|
||||||
sv.Detections: An object containing the masks and their bounding box
|
outputs.pred_masks.cpu(),
|
||||||
coordinates.
|
inputs["original_sizes"].cpu(),
|
||||||
"""
|
inputs["reshaped_input_sizes"].cpu(),
|
||||||
return sv.Detections(
|
|
||||||
mask=masks, xyxy=sv.mask_to_xyxy(masks=masks)
|
|
||||||
)
|
)
|
||||||
|
scores = outputs.iou_scores
|
||||||
|
|
||||||
|
return scores, masks
|
||||||
|
|
||||||
def refine_marks(
|
def process_img(self, img: str = None, *args, **kwargs):
|
||||||
marks: sv.Detections,
|
|
||||||
maximum_hole_area: float = 0.01,
|
|
||||||
maximum_island_area: float = 0.01,
|
|
||||||
minimum_mask_area: float = 0.02,
|
|
||||||
maximum_mask_area: float = 1.0,
|
|
||||||
) -> sv.Detections:
|
|
||||||
"""
|
"""
|
||||||
Refines a set of masks by removing small islands and holes, and filtering by mask
|
Processes the input image and returns the processed image.
|
||||||
area.
|
|
||||||
|
|
||||||
Parameters:
|
Args:
|
||||||
marks (sv.Detections): An object containing the masks and their bounding box
|
img (str): The URL or file path of the input image.
|
||||||
coordinates.
|
*args: Additional positional arguments.
|
||||||
maximum_hole_area (float): The maximum relative area of holes to be filled in
|
**kwargs: Additional keyword arguments.
|
||||||
each mask.
|
|
||||||
maximum_island_area (float): The maximum relative area of islands to be removed
|
|
||||||
from each mask.
|
|
||||||
minimum_mask_area (float): The minimum relative area for a mask to be retained.
|
|
||||||
maximum_mask_area (float): The maximum relative area for a mask to be retained.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
sv.Detections: An object containing the masks and their bounding box
|
Image: The processed image.
|
||||||
coordinates.
|
|
||||||
"""
|
|
||||||
result_masks = []
|
|
||||||
for mask in marks.mask:
|
|
||||||
mask = adjust_mask_features_by_relative_area(
|
|
||||||
mask=mask,
|
|
||||||
area_threshold=maximum_island_area,
|
|
||||||
feature_type=FeatureType.ISLAND,
|
|
||||||
)
|
|
||||||
mask = adjust_mask_features_by_relative_area(
|
|
||||||
mask=mask,
|
|
||||||
area_threshold=maximum_hole_area,
|
|
||||||
feature_type=FeatureType.HOLE,
|
|
||||||
)
|
|
||||||
if np.any(mask):
|
|
||||||
result_masks.append(mask)
|
|
||||||
result_masks = np.array(result_masks)
|
|
||||||
result_masks = filter_masks_by_relative_area(
|
|
||||||
masks=result_masks,
|
|
||||||
minimum_area=minimum_mask_area,
|
|
||||||
maximum_area=maximum_mask_area,
|
|
||||||
)
|
|
||||||
return sv.Detections(
|
|
||||||
mask=result_masks, xyxy=sv.mask_to_xyxy(masks=result_masks)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SegmentAnythingMarkGenerator:
|
|
||||||
"""
|
|
||||||
A class for performing image segmentation using a specified model.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
device (str): The device to run the model on (e.g., 'cpu', 'cuda').
|
|
||||||
model_name (str): The name of the model to be loaded. Defaults to
|
|
||||||
'facebook/sam-vit-huge'.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
device: str = "cpu",
|
|
||||||
model_name: str = "facebook/sam-vit-huge",
|
|
||||||
):
|
|
||||||
self.model = SamModel.from_pretrained(model_name).to(device)
|
|
||||||
self.processor = SamProcessor.from_pretrained(model_name)
|
|
||||||
self.image_processor = SamImageProcessor.from_pretrained(
|
|
||||||
model_name
|
|
||||||
)
|
|
||||||
self.pipeline = pipeline(
|
|
||||||
task="mask-generation",
|
|
||||||
model=self.model,
|
|
||||||
image_processor=self.image_processor,
|
|
||||||
device=device,
|
|
||||||
)
|
|
||||||
|
|
||||||
def run(self, image: np.ndarray) -> sv.Detections:
|
|
||||||
"""
|
"""
|
||||||
Generate image segmentation marks.
|
raw_image = Image.open(
|
||||||
|
requests.get(img, stream=True, *args, **kwargs).raw
|
||||||
|
).convert("RGB")
|
||||||
|
|
||||||
Parameters:
|
return raw_image
|
||||||
image (np.ndarray): The image to be marked in BGR format.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
sv.Detections: An object containing the segmentation masks and their
|
|
||||||
corresponding bounding box coordinates.
|
|
||||||
"""
|
|
||||||
image = Image.fromarray(
|
|
||||||
cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
||||||
)
|
|
||||||
outputs = self.pipeline(image, points_per_batch=64)
|
|
||||||
masks = np.array(outputs["masks"])
|
|
||||||
return masks_to_marks(masks=masks)
|
|
||||||
|
Loading…
Reference in new issue