[FEAT][SimpleAgent]

pull/336/head
Kye 1 year ago
parent 36b022ed41
commit 63236dbee3

1
.gitignore vendored

@ -18,6 +18,7 @@ venv
swarms/agents/.DS_Store swarms/agents/.DS_Store
_build _build
conversation.txt
stderr_log.txt stderr_log.txt
.vscode .vscode

@ -464,6 +464,7 @@ print(video_path)
- Plug in and play conversational agent with `GPT4`, `Mixytral`, or any of our models - Plug in and play conversational agent with `GPT4`, `Mixytral`, or any of our models
- Reliable conversational structure to hold messages together with dynamic handling for long context conversations and interactions with auto chunking - Reliable conversational structure to hold messages together with dynamic handling for long context conversations and interactions with auto chunking
- Reliable, this simple system will always provide responses you want. - Reliable, this simple system will always provide responses you want.
```python ```python
import os import os
@ -474,7 +475,9 @@ from swarms import (
Conversation, Conversation,
) )
conv = Conversation() conv = Conversation(
time_enabled=True,
)
# Load the environment variables # Load the environment variables
load_dotenv() load_dotenv()
@ -499,7 +502,7 @@ def interactive_conversation(llm):
out = llm(task) out = llm(task)
conv.add("assistant", out) conv.add("assistant", out)
print( print(
f"Assistant: {out}", #color="cyan" f"Assistant: {out}",
) )
conv.display_conversation() conv.display_conversation()
conv.export_conversation("conversation.txt") conv.export_conversation("conversation.txt")

@ -5,10 +5,11 @@ from dotenv import load_dotenv
from swarms import ( from swarms import (
OpenAIChat, OpenAIChat,
Conversation, Conversation,
# display_markdown_message,
) )
conv = Conversation() conv = Conversation(
time_enabled=True,
)
# Load the environment variables # Load the environment variables
load_dotenv() load_dotenv()
@ -19,10 +20,11 @@ api_key = os.environ.get("OPENAI_API_KEY")
# Initialize the language model # Initialize the language model
llm = OpenAIChat(openai_api_key=api_key, model_name="gpt-4") llm = OpenAIChat(openai_api_key=api_key, model_name="gpt-4")
# Run the language model in a loop # Run the language model in a loop
def interactive_conversation(llm): def interactive_conversation(llm, iters: int = 10):
conv = Conversation() conv = Conversation()
while True: for i in range(iters):
user_input = input("User: ") user_input = input("User: ")
conv.add("user", user_input) conv.add("user", user_input)
if user_input.lower() == "quit": if user_input.lower() == "quit":
@ -33,7 +35,7 @@ def interactive_conversation(llm):
out = llm(task) out = llm(task)
conv.add("assistant", out) conv.add("assistant", out)
print( print(
f"Assistant: {out}", #color="cyan" f"Assistant: {out}",
) )
conv.display_conversation() conv.display_conversation()
conv.export_conversation("conversation.txt") conv.export_conversation("conversation.txt")

@ -0,0 +1,39 @@
from swarms import Conversation, AbstractLLM
# Run the language model in a loop for n iterations
def SimpleAgent(
llm: AbstractLLM = None, iters: int = 10, *args, **kwargs
):
"""Simple agent conversation
Args:
llm (_type_): _description_
iters (int, optional): _description_. Defaults to 10.
"""
try:
conv = Conversation(*args, **kwargs)
for i in range(iters):
user_input = input("User: ")
conv.add("user", user_input)
if user_input.lower() == "quit":
break
task = (
conv.return_history_as_string()
) # Get the conversation history
out = llm(task)
conv.add("assistant", out)
print(
f"Assistant: {out}",
)
conv.display_conversation()
conv.export_conversation("conversation.txt")
except Exception as error:
print(f"[ERROR][SimpleAgentConversation] {error}")
raise error
except KeyboardInterrupt:
print("[INFO][SimpleAgentConversation] Keyboard interrupt")
conv.export_conversation("conversation.txt")
raise KeyboardInterrupt

@ -1,315 +1,107 @@
import cv2 import torch
import numpy as np
from PIL import Image from PIL import Image
from transformers import ( import requests
SamImageProcessor, from transformers import SamModel, SamProcessor
SamModel, from typing import List
SamProcessor,
pipeline,
)
try:
import cv2
import supervision as sv
except ImportError:
print("Please install supervision and cv")
from enum import Enum
class FeatureType(Enum):
"""
An enumeration to represent the types of features for mask adjustment in image
segmentation.
"""
ISLAND = "ISLAND"
HOLE = "HOLE"
@classmethod device = "cuda" if torch.cuda.is_available() else "cpu"
def list(cls):
return list(map(lambda c: c.value, cls))
def compute_mask_iou_vectorized(masks: np.ndarray) -> np.ndarray: class SAM:
""" """
Vectorized computation of the Intersection over Union (IoU) for all pairs of masks. Class representing the SAM (Segmentation and Masking) model.
Parameters: Args:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the model_name (str): The name of the pre-trained SAM model. Default is "facebook/sam-vit-huge".
number of masks, `H` is the height, and `W` is the width. device (torch.device): The device to run the model on. Default is the current device.
input_points (List[List[int]]): The 2D location of a window in the image to segment. Default is [[450, 600]].
Returns: *args: Additional positional arguments.
np.ndarray: A 2D numpy array of shape `(N, N)` where each element `[i, j]` is **kwargs: Additional keyword arguments.
the IoU between masks `i` and `j`.
Raises:
ValueError: If any of the masks is found to be empty.
"""
if np.any(masks.sum(axis=(1, 2)) == 0):
raise ValueError(
"One or more masks are empty. Please filter out empty"
" masks before using `compute_iou_vectorized` function."
)
masks_bool = masks.astype(bool)
masks_flat = masks_bool.reshape(masks.shape[0], -1)
intersection = np.logical_and(
masks_flat[:, None], masks_flat[None, :]
).sum(axis=2)
union = np.logical_or(
masks_flat[:, None], masks_flat[None, :]
).sum(axis=2)
iou_matrix = intersection / union
return iou_matrix
def mask_non_max_suppression(
masks: np.ndarray, iou_threshold: float = 0.6
) -> np.ndarray:
"""
Performs Non-Max Suppression on a set of masks by prioritizing larger masks and
removing smaller masks that overlap significantly.
When the IoU between two masks exceeds the specified threshold, the smaller mask Attributes:
(in terms of area) is discarded. This process is repeated for each pair of masks, model_name (str): The name of the pre-trained SAM model.
effectively filtering out masks that are significantly overlapped by larger ones. device (torch.device): The device to run the model on.
input_points (List[List[int]]): The 2D location of a window in the image to segment.
model (SamModel): The pre-trained SAM model.
processor (SamProcessor): The processor for the SAM model.
Parameters: Methods:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the run(task=None, img=None, *args, **kwargs): Runs the SAM model on the given image and returns the segmentation scores and masks.
number of masks, `H` is the height, and `W` is the width. process_img(img: str = None, *args, **kwargs): Processes the input image and returns the processed image.
iou_threshold (float): The IoU threshold for determining significant overlap.
Returns:
np.ndarray: A 3D numpy array of filtered masks.
""" """
num_masks = masks.shape[0]
areas = masks.sum(axis=(1, 2))
sorted_idx = np.argsort(-areas)
keep_mask = np.ones(num_masks, dtype=bool)
iou_matrix = compute_mask_iou_vectorized(masks)
for i in range(num_masks):
if not keep_mask[sorted_idx[i]]:
continue
overlapping_masks = iou_matrix[sorted_idx[i]] > iou_threshold def __init__(
overlapping_masks[sorted_idx[i]] = False self,
keep_mask[sorted_idx] = np.logical_and( model_name: str = "facebook/sam-vit-huge",
keep_mask[sorted_idx], ~overlapping_masks device=device,
) input_points: List[List[int]] = [[450, 600]],
*args,
**kwargs,
):
self.model_name = model_name
self.device = device
self.input_points = input_points
return masks[keep_mask] self.model = SamModel.from_pretrained(
model_name, *args, **kwargs
).to(device)
self.processor = SamProcessor.from_pretrained(model_name)
def filter_masks_by_relative_area( def run(self, task=None, img=None, *args, **kwargs):
masks: np.ndarray,
minimum_area: float = 0.01,
maximum_area: float = 1.0,
) -> np.ndarray:
""" """
Filters masks based on their relative area within the total area of each mask. Runs the SAM model on the given image and returns the segmentation scores and masks.
Parameters: Args:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the task: The task to perform. Not used in this method.
number of masks, `H` is the height, and `W` is the width. img: The input image to segment.
minimum_area (float): The minimum relative area threshold. Must be between `0` *args: Additional positional arguments.
and `1`. **kwargs: Additional keyword arguments.
maximum_area (float): The maximum relative area threshold. Must be between `0`
and `1`.
Returns: Returns:
np.ndarray: A 3D numpy array containing masks that fall within the specified Tuple: A tuple containing the segmentation scores and masks.
relative area range.
Raises:
ValueError: If `minimum_area` or `maximum_area` are outside the `0` to `1`
range, or if `minimum_area` is greater than `maximum_area`.
"""
if not (isinstance(masks, np.ndarray) and masks.ndim == 3):
raise ValueError("Input must be a 3D numpy array.")
if not (0 <= minimum_area <= 1) or not (0 <= maximum_area <= 1):
raise ValueError(
"`minimum_area` and `maximum_area` must be between 0"
" and 1."
)
if minimum_area > maximum_area:
raise ValueError(
"`minimum_area` must be less than or equal to"
" `maximum_area`."
)
total_area = masks.shape[1] * masks.shape[2]
relative_areas = masks.sum(axis=(1, 2)) / total_area
return masks[
(relative_areas >= minimum_area)
& (relative_areas <= maximum_area)
]
def adjust_mask_features_by_relative_area(
mask: np.ndarray,
area_threshold: float,
feature_type: FeatureType = FeatureType.ISLAND,
) -> np.ndarray:
"""
Adjusts a mask by removing small islands or filling small holes based on a relative
area threshold.
!!! warning
Running this function on a mask with small islands may result in empty masks.
Parameters:
mask (np.ndarray): A 2D numpy array with shape `(H, W)`, where `H` is the
height, and `W` is the width.
area_threshold (float): Threshold for relative area to remove or fill features.
feature_type (FeatureType): Type of feature to adjust (`ISLAND` for removing
islands, `HOLE` for filling holes).
Returns:
np.ndarray: A 2D numpy array containing mask.
""" """
height, width = mask.shape img = self.process_img(img)
total_area = width * height
mask = np.uint8(mask * 255)
operation = (
cv2.RETR_EXTERNAL
if feature_type == FeatureType.ISLAND
else cv2.RETR_CCOMP
)
contours, _ = cv2.findContours(
mask, operation, cv2.CHAIN_APPROX_SIMPLE
)
for contour in contours:
area = cv2.contourArea(contour)
relative_area = area / total_area
if relative_area < area_threshold:
cv2.drawContours(
image=mask,
contours=[contour],
contourIdx=-1,
color=(
0 if feature_type == FeatureType.ISLAND else 255
),
thickness=-1,
)
return np.where(mask > 0, 1, 0).astype(bool)
# Specify the points of the mask to segment
input_points = [
self.input_points
] # 2D location of a window in the image
def masks_to_marks(masks: np.ndarray) -> sv.Detections: # Preprocess the image
""" inputs = self.processor(
Converts a set of masks to a marks (sv.Detections) object. img, input_points=input_points, return_tensors="pt"
).to(device)
Parameters: with torch.no_grad():
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the outputs = self.model(**inputs) # noqa: E999
number of masks, `H` is the height, and `W` is the width.
Returns: masks = self.processor.image_processor.post_process_masks(
sv.Detections: An object containing the masks and their bounding box outputs.pred_masks.cpu(),
coordinates. inputs["original_sizes"].cpu(),
""" inputs["reshaped_input_sizes"].cpu(),
return sv.Detections(
mask=masks, xyxy=sv.mask_to_xyxy(masks=masks)
) )
scores = outputs.iou_scores
return scores, masks
def refine_marks( def process_img(self, img: str = None, *args, **kwargs):
marks: sv.Detections,
maximum_hole_area: float = 0.01,
maximum_island_area: float = 0.01,
minimum_mask_area: float = 0.02,
maximum_mask_area: float = 1.0,
) -> sv.Detections:
""" """
Refines a set of masks by removing small islands and holes, and filtering by mask Processes the input image and returns the processed image.
area.
Parameters: Args:
marks (sv.Detections): An object containing the masks and their bounding box img (str): The URL or file path of the input image.
coordinates. *args: Additional positional arguments.
maximum_hole_area (float): The maximum relative area of holes to be filled in **kwargs: Additional keyword arguments.
each mask.
maximum_island_area (float): The maximum relative area of islands to be removed
from each mask.
minimum_mask_area (float): The minimum relative area for a mask to be retained.
maximum_mask_area (float): The maximum relative area for a mask to be retained.
Returns: Returns:
sv.Detections: An object containing the masks and their bounding box Image: The processed image.
coordinates.
"""
result_masks = []
for mask in marks.mask:
mask = adjust_mask_features_by_relative_area(
mask=mask,
area_threshold=maximum_island_area,
feature_type=FeatureType.ISLAND,
)
mask = adjust_mask_features_by_relative_area(
mask=mask,
area_threshold=maximum_hole_area,
feature_type=FeatureType.HOLE,
)
if np.any(mask):
result_masks.append(mask)
result_masks = np.array(result_masks)
result_masks = filter_masks_by_relative_area(
masks=result_masks,
minimum_area=minimum_mask_area,
maximum_area=maximum_mask_area,
)
return sv.Detections(
mask=result_masks, xyxy=sv.mask_to_xyxy(masks=result_masks)
)
class SegmentAnythingMarkGenerator:
"""
A class for performing image segmentation using a specified model.
Parameters:
device (str): The device to run the model on (e.g., 'cpu', 'cuda').
model_name (str): The name of the model to be loaded. Defaults to
'facebook/sam-vit-huge'.
"""
def __init__(
self,
device: str = "cpu",
model_name: str = "facebook/sam-vit-huge",
):
self.model = SamModel.from_pretrained(model_name).to(device)
self.processor = SamProcessor.from_pretrained(model_name)
self.image_processor = SamImageProcessor.from_pretrained(
model_name
)
self.pipeline = pipeline(
task="mask-generation",
model=self.model,
image_processor=self.image_processor,
device=device,
)
def run(self, image: np.ndarray) -> sv.Detections:
""" """
Generate image segmentation marks. raw_image = Image.open(
requests.get(img, stream=True, *args, **kwargs).raw
).convert("RGB")
Parameters: return raw_image
image (np.ndarray): The image to be marked in BGR format.
Returns:
sv.Detections: An object containing the segmentation masks and their
corresponding bounding box coordinates.
"""
image = Image.fromarray(
cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
)
outputs = self.pipeline(image, points_per_batch=64)
masks = np.array(outputs["masks"])
return masks_to_marks(masks=masks)

Loading…
Cancel
Save