[FEAT][SimpleAgent]

pull/336/head
Kye 1 year ago
parent 36b022ed41
commit 63236dbee3

1
.gitignore vendored

@ -18,6 +18,7 @@ venv
swarms/agents/.DS_Store
_build
conversation.txt
stderr_log.txt
.vscode

@ -464,6 +464,7 @@ print(video_path)
- Plug in and play conversational agent with `GPT4`, `Mixytral`, or any of our models
- Reliable conversational structure to hold messages together with dynamic handling for long context conversations and interactions with auto chunking
- Reliable, this simple system will always provide responses you want.
```python
import os
@ -474,7 +475,9 @@ from swarms import (
Conversation,
)
conv = Conversation()
conv = Conversation(
time_enabled=True,
)
# Load the environment variables
load_dotenv()
@ -499,7 +502,7 @@ def interactive_conversation(llm):
out = llm(task)
conv.add("assistant", out)
print(
f"Assistant: {out}", #color="cyan"
f"Assistant: {out}",
)
conv.display_conversation()
conv.export_conversation("conversation.txt")

@ -5,10 +5,11 @@ from dotenv import load_dotenv
from swarms import (
OpenAIChat,
Conversation,
# display_markdown_message,
)
conv = Conversation()
conv = Conversation(
time_enabled=True,
)
# Load the environment variables
load_dotenv()
@ -19,10 +20,11 @@ api_key = os.environ.get("OPENAI_API_KEY")
# Initialize the language model
llm = OpenAIChat(openai_api_key=api_key, model_name="gpt-4")
# Run the language model in a loop
def interactive_conversation(llm):
def interactive_conversation(llm, iters: int = 10):
conv = Conversation()
while True:
for i in range(iters):
user_input = input("User: ")
conv.add("user", user_input)
if user_input.lower() == "quit":
@ -33,7 +35,7 @@ def interactive_conversation(llm):
out = llm(task)
conv.add("assistant", out)
print(
f"Assistant: {out}", #color="cyan"
f"Assistant: {out}",
)
conv.display_conversation()
conv.export_conversation("conversation.txt")

@ -0,0 +1,39 @@
from swarms import Conversation, AbstractLLM
# Run the language model in a loop for n iterations
def SimpleAgent(
llm: AbstractLLM = None, iters: int = 10, *args, **kwargs
):
"""Simple agent conversation
Args:
llm (_type_): _description_
iters (int, optional): _description_. Defaults to 10.
"""
try:
conv = Conversation(*args, **kwargs)
for i in range(iters):
user_input = input("User: ")
conv.add("user", user_input)
if user_input.lower() == "quit":
break
task = (
conv.return_history_as_string()
) # Get the conversation history
out = llm(task)
conv.add("assistant", out)
print(
f"Assistant: {out}",
)
conv.display_conversation()
conv.export_conversation("conversation.txt")
except Exception as error:
print(f"[ERROR][SimpleAgentConversation] {error}")
raise error
except KeyboardInterrupt:
print("[INFO][SimpleAgentConversation] Keyboard interrupt")
conv.export_conversation("conversation.txt")
raise KeyboardInterrupt

@ -1,315 +1,107 @@
import cv2
import numpy as np
import torch
from PIL import Image
from transformers import (
SamImageProcessor,
SamModel,
SamProcessor,
pipeline,
)
import requests
from transformers import SamModel, SamProcessor
from typing import List
try:
import cv2
import supervision as sv
except ImportError:
print("Please install supervision and cv")
device = "cuda" if torch.cuda.is_available() else "cpu"
from enum import Enum
class FeatureType(Enum):
"""
An enumeration to represent the types of features for mask adjustment in image
segmentation.
class SAM:
"""
Class representing the SAM (Segmentation and Masking) model.
ISLAND = "ISLAND"
HOLE = "HOLE"
Args:
model_name (str): The name of the pre-trained SAM model. Default is "facebook/sam-vit-huge".
device (torch.device): The device to run the model on. Default is the current device.
input_points (List[List[int]]): The 2D location of a window in the image to segment. Default is [[450, 600]].
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
@classmethod
def list(cls):
return list(map(lambda c: c.value, cls))
Attributes:
model_name (str): The name of the pre-trained SAM model.
device (torch.device): The device to run the model on.
input_points (List[List[int]]): The 2D location of a window in the image to segment.
model (SamModel): The pre-trained SAM model.
processor (SamProcessor): The processor for the SAM model.
Methods:
run(task=None, img=None, *args, **kwargs): Runs the SAM model on the given image and returns the segmentation scores and masks.
process_img(img: str = None, *args, **kwargs): Processes the input image and returns the processed image.
def compute_mask_iou_vectorized(masks: np.ndarray) -> np.ndarray:
"""
Vectorized computation of the Intersection over Union (IoU) for all pairs of masks.
Parameters:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
number of masks, `H` is the height, and `W` is the width.
Returns:
np.ndarray: A 2D numpy array of shape `(N, N)` where each element `[i, j]` is
the IoU between masks `i` and `j`.
Raises:
ValueError: If any of the masks is found to be empty.
"""
if np.any(masks.sum(axis=(1, 2)) == 0):
raise ValueError(
"One or more masks are empty. Please filter out empty"
" masks before using `compute_iou_vectorized` function."
)
def __init__(
self,
model_name: str = "facebook/sam-vit-huge",
device=device,
input_points: List[List[int]] = [[450, 600]],
*args,
**kwargs,
):
self.model_name = model_name
self.device = device
self.input_points = input_points
masks_bool = masks.astype(bool)
masks_flat = masks_bool.reshape(masks.shape[0], -1)
intersection = np.logical_and(
masks_flat[:, None], masks_flat[None, :]
).sum(axis=2)
union = np.logical_or(
masks_flat[:, None], masks_flat[None, :]
).sum(axis=2)
iou_matrix = intersection / union
return iou_matrix
self.model = SamModel.from_pretrained(
model_name, *args, **kwargs
).to(device)
self.processor = SamProcessor.from_pretrained(model_name)
def mask_non_max_suppression(
masks: np.ndarray, iou_threshold: float = 0.6
) -> np.ndarray:
def run(self, task=None, img=None, *args, **kwargs):
"""
Performs Non-Max Suppression on a set of masks by prioritizing larger masks and
removing smaller masks that overlap significantly.
Runs the SAM model on the given image and returns the segmentation scores and masks.
When the IoU between two masks exceeds the specified threshold, the smaller mask
(in terms of area) is discarded. This process is repeated for each pair of masks,
effectively filtering out masks that are significantly overlapped by larger ones.
Parameters:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
number of masks, `H` is the height, and `W` is the width.
iou_threshold (float): The IoU threshold for determining significant overlap.
Args:
task: The task to perform. Not used in this method.
img: The input image to segment.
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
Returns:
np.ndarray: A 3D numpy array of filtered masks.
"""
num_masks = masks.shape[0]
areas = masks.sum(axis=(1, 2))
sorted_idx = np.argsort(-areas)
keep_mask = np.ones(num_masks, dtype=bool)
iou_matrix = compute_mask_iou_vectorized(masks)
for i in range(num_masks):
if not keep_mask[sorted_idx[i]]:
continue
overlapping_masks = iou_matrix[sorted_idx[i]] > iou_threshold
overlapping_masks[sorted_idx[i]] = False
keep_mask[sorted_idx] = np.logical_and(
keep_mask[sorted_idx], ~overlapping_masks
)
Tuple: A tuple containing the segmentation scores and masks.
return masks[keep_mask]
def filter_masks_by_relative_area(
masks: np.ndarray,
minimum_area: float = 0.01,
maximum_area: float = 1.0,
) -> np.ndarray:
"""
Filters masks based on their relative area within the total area of each mask.
img = self.process_img(img)
Parameters:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
number of masks, `H` is the height, and `W` is the width.
minimum_area (float): The minimum relative area threshold. Must be between `0`
and `1`.
maximum_area (float): The maximum relative area threshold. Must be between `0`
and `1`.
# Specify the points of the mask to segment
input_points = [
self.input_points
] # 2D location of a window in the image
Returns:
np.ndarray: A 3D numpy array containing masks that fall within the specified
relative area range.
Raises:
ValueError: If `minimum_area` or `maximum_area` are outside the `0` to `1`
range, or if `minimum_area` is greater than `maximum_area`.
"""
# Preprocess the image
inputs = self.processor(
img, input_points=input_points, return_tensors="pt"
).to(device)
if not (isinstance(masks, np.ndarray) and masks.ndim == 3):
raise ValueError("Input must be a 3D numpy array.")
with torch.no_grad():
outputs = self.model(**inputs) # noqa: E999
if not (0 <= minimum_area <= 1) or not (0 <= maximum_area <= 1):
raise ValueError(
"`minimum_area` and `maximum_area` must be between 0"
" and 1."
masks = self.processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(),
inputs["original_sizes"].cpu(),
inputs["reshaped_input_sizes"].cpu(),
)
scores = outputs.iou_scores
if minimum_area > maximum_area:
raise ValueError(
"`minimum_area` must be less than or equal to"
" `maximum_area`."
)
total_area = masks.shape[1] * masks.shape[2]
relative_areas = masks.sum(axis=(1, 2)) / total_area
return masks[
(relative_areas >= minimum_area)
& (relative_areas <= maximum_area)
]
return scores, masks
def adjust_mask_features_by_relative_area(
mask: np.ndarray,
area_threshold: float,
feature_type: FeatureType = FeatureType.ISLAND,
) -> np.ndarray:
def process_img(self, img: str = None, *args, **kwargs):
"""
Adjusts a mask by removing small islands or filling small holes based on a relative
area threshold.
!!! warning
Processes the input image and returns the processed image.
Running this function on a mask with small islands may result in empty masks.
Parameters:
mask (np.ndarray): A 2D numpy array with shape `(H, W)`, where `H` is the
height, and `W` is the width.
area_threshold (float): Threshold for relative area to remove or fill features.
feature_type (FeatureType): Type of feature to adjust (`ISLAND` for removing
islands, `HOLE` for filling holes).
Args:
img (str): The URL or file path of the input image.
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
Returns:
np.ndarray: A 2D numpy array containing mask.
"""
height, width = mask.shape
total_area = width * height
mask = np.uint8(mask * 255)
operation = (
cv2.RETR_EXTERNAL
if feature_type == FeatureType.ISLAND
else cv2.RETR_CCOMP
)
contours, _ = cv2.findContours(
mask, operation, cv2.CHAIN_APPROX_SIMPLE
)
Image: The processed image.
for contour in contours:
area = cv2.contourArea(contour)
relative_area = area / total_area
if relative_area < area_threshold:
cv2.drawContours(
image=mask,
contours=[contour],
contourIdx=-1,
color=(
0 if feature_type == FeatureType.ISLAND else 255
),
thickness=-1,
)
return np.where(mask > 0, 1, 0).astype(bool)
def masks_to_marks(masks: np.ndarray) -> sv.Detections:
"""
Converts a set of masks to a marks (sv.Detections) object.
Parameters:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
number of masks, `H` is the height, and `W` is the width.
raw_image = Image.open(
requests.get(img, stream=True, *args, **kwargs).raw
).convert("RGB")
Returns:
sv.Detections: An object containing the masks and their bounding box
coordinates.
"""
return sv.Detections(
mask=masks, xyxy=sv.mask_to_xyxy(masks=masks)
)
def refine_marks(
marks: sv.Detections,
maximum_hole_area: float = 0.01,
maximum_island_area: float = 0.01,
minimum_mask_area: float = 0.02,
maximum_mask_area: float = 1.0,
) -> sv.Detections:
"""
Refines a set of masks by removing small islands and holes, and filtering by mask
area.
Parameters:
marks (sv.Detections): An object containing the masks and their bounding box
coordinates.
maximum_hole_area (float): The maximum relative area of holes to be filled in
each mask.
maximum_island_area (float): The maximum relative area of islands to be removed
from each mask.
minimum_mask_area (float): The minimum relative area for a mask to be retained.
maximum_mask_area (float): The maximum relative area for a mask to be retained.
Returns:
sv.Detections: An object containing the masks and their bounding box
coordinates.
"""
result_masks = []
for mask in marks.mask:
mask = adjust_mask_features_by_relative_area(
mask=mask,
area_threshold=maximum_island_area,
feature_type=FeatureType.ISLAND,
)
mask = adjust_mask_features_by_relative_area(
mask=mask,
area_threshold=maximum_hole_area,
feature_type=FeatureType.HOLE,
)
if np.any(mask):
result_masks.append(mask)
result_masks = np.array(result_masks)
result_masks = filter_masks_by_relative_area(
masks=result_masks,
minimum_area=minimum_mask_area,
maximum_area=maximum_mask_area,
)
return sv.Detections(
mask=result_masks, xyxy=sv.mask_to_xyxy(masks=result_masks)
)
class SegmentAnythingMarkGenerator:
"""
A class for performing image segmentation using a specified model.
Parameters:
device (str): The device to run the model on (e.g., 'cpu', 'cuda').
model_name (str): The name of the model to be loaded. Defaults to
'facebook/sam-vit-huge'.
"""
def __init__(
self,
device: str = "cpu",
model_name: str = "facebook/sam-vit-huge",
):
self.model = SamModel.from_pretrained(model_name).to(device)
self.processor = SamProcessor.from_pretrained(model_name)
self.image_processor = SamImageProcessor.from_pretrained(
model_name
)
self.pipeline = pipeline(
task="mask-generation",
model=self.model,
image_processor=self.image_processor,
device=device,
)
def run(self, image: np.ndarray) -> sv.Detections:
"""
Generate image segmentation marks.
Parameters:
image (np.ndarray): The image to be marked in BGR format.
Returns:
sv.Detections: An object containing the segmentation masks and their
corresponding bounding box coordinates.
"""
image = Image.fromarray(
cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
)
outputs = self.pipeline(image, points_per_batch=64)
masks = np.array(outputs["masks"])
return masks_to_marks(masks=masks)
return raw_image

Loading…
Cancel
Save