[FEAT][SimpleAgent]

pull/336/head
Kye 1 year ago
parent 36b022ed41
commit 63236dbee3

1
.gitignore vendored

@ -18,6 +18,7 @@ venv
swarms/agents/.DS_Store swarms/agents/.DS_Store
_build _build
conversation.txt
stderr_log.txt stderr_log.txt
.vscode .vscode

@ -464,6 +464,7 @@ print(video_path)
- Plug in and play conversational agent with `GPT4`, `Mixytral`, or any of our models - Plug in and play conversational agent with `GPT4`, `Mixytral`, or any of our models
- Reliable conversational structure to hold messages together with dynamic handling for long context conversations and interactions with auto chunking - Reliable conversational structure to hold messages together with dynamic handling for long context conversations and interactions with auto chunking
- Reliable, this simple system will always provide responses you want. - Reliable, this simple system will always provide responses you want.
```python ```python
import os import os
@ -474,7 +475,9 @@ from swarms import (
Conversation, Conversation,
) )
conv = Conversation() conv = Conversation(
time_enabled=True,
)
# Load the environment variables # Load the environment variables
load_dotenv() load_dotenv()
@ -499,7 +502,7 @@ def interactive_conversation(llm):
out = llm(task) out = llm(task)
conv.add("assistant", out) conv.add("assistant", out)
print( print(
f"Assistant: {out}", #color="cyan" f"Assistant: {out}",
) )
conv.display_conversation() conv.display_conversation()
conv.export_conversation("conversation.txt") conv.export_conversation("conversation.txt")

@ -5,10 +5,11 @@ from dotenv import load_dotenv
from swarms import ( from swarms import (
OpenAIChat, OpenAIChat,
Conversation, Conversation,
# display_markdown_message,
) )
conv = Conversation() conv = Conversation(
time_enabled=True,
)
# Load the environment variables # Load the environment variables
load_dotenv() load_dotenv()
@ -19,10 +20,11 @@ api_key = os.environ.get("OPENAI_API_KEY")
# Initialize the language model # Initialize the language model
llm = OpenAIChat(openai_api_key=api_key, model_name="gpt-4") llm = OpenAIChat(openai_api_key=api_key, model_name="gpt-4")
# Run the language model in a loop # Run the language model in a loop
def interactive_conversation(llm): def interactive_conversation(llm, iters: int = 10):
conv = Conversation() conv = Conversation()
while True: for i in range(iters):
user_input = input("User: ") user_input = input("User: ")
conv.add("user", user_input) conv.add("user", user_input)
if user_input.lower() == "quit": if user_input.lower() == "quit":
@ -33,10 +35,10 @@ def interactive_conversation(llm):
out = llm(task) out = llm(task)
conv.add("assistant", out) conv.add("assistant", out)
print( print(
f"Assistant: {out}", #color="cyan" f"Assistant: {out}",
) )
conv.display_conversation() conv.display_conversation()
conv.export_conversation("conversation.txt") conv.export_conversation("conversation.txt")
# Replace with your LLM instance # Replace with your LLM instance

@ -0,0 +1,39 @@
from swarms import Conversation, AbstractLLM
# Run the language model in a loop for n iterations
def SimpleAgent(
llm: AbstractLLM = None, iters: int = 10, *args, **kwargs
):
"""Simple agent conversation
Args:
llm (_type_): _description_
iters (int, optional): _description_. Defaults to 10.
"""
try:
conv = Conversation(*args, **kwargs)
for i in range(iters):
user_input = input("User: ")
conv.add("user", user_input)
if user_input.lower() == "quit":
break
task = (
conv.return_history_as_string()
) # Get the conversation history
out = llm(task)
conv.add("assistant", out)
print(
f"Assistant: {out}",
)
conv.display_conversation()
conv.export_conversation("conversation.txt")
except Exception as error:
print(f"[ERROR][SimpleAgentConversation] {error}")
raise error
except KeyboardInterrupt:
print("[INFO][SimpleAgentConversation] Keyboard interrupt")
conv.export_conversation("conversation.txt")
raise KeyboardInterrupt

@ -1,315 +1,107 @@
import cv2 import torch
import numpy as np
from PIL import Image from PIL import Image
from transformers import ( import requests
SamImageProcessor, from transformers import SamModel, SamProcessor
SamModel, from typing import List
SamProcessor,
pipeline,
)
try: device = "cuda" if torch.cuda.is_available() else "cpu"
import cv2
import supervision as sv
except ImportError:
print("Please install supervision and cv")
from enum import Enum class SAM:
class FeatureType(Enum):
"""
An enumeration to represent the types of features for mask adjustment in image
segmentation.
"""
ISLAND = "ISLAND"
HOLE = "HOLE"
@classmethod
def list(cls):
return list(map(lambda c: c.value, cls))
def compute_mask_iou_vectorized(masks: np.ndarray) -> np.ndarray:
"""
Vectorized computation of the Intersection over Union (IoU) for all pairs of masks.
Parameters:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
number of masks, `H` is the height, and `W` is the width.
Returns:
np.ndarray: A 2D numpy array of shape `(N, N)` where each element `[i, j]` is
the IoU between masks `i` and `j`.
Raises:
ValueError: If any of the masks is found to be empty.
"""
if np.any(masks.sum(axis=(1, 2)) == 0):
raise ValueError(
"One or more masks are empty. Please filter out empty"
" masks before using `compute_iou_vectorized` function."
)
masks_bool = masks.astype(bool)
masks_flat = masks_bool.reshape(masks.shape[0], -1)
intersection = np.logical_and(
masks_flat[:, None], masks_flat[None, :]
).sum(axis=2)
union = np.logical_or(
masks_flat[:, None], masks_flat[None, :]
).sum(axis=2)
iou_matrix = intersection / union
return iou_matrix
def mask_non_max_suppression(
masks: np.ndarray, iou_threshold: float = 0.6
) -> np.ndarray:
""" """
Performs Non-Max Suppression on a set of masks by prioritizing larger masks and Class representing the SAM (Segmentation and Masking) model.
removing smaller masks that overlap significantly.
When the IoU between two masks exceeds the specified threshold, the smaller mask Args:
(in terms of area) is discarded. This process is repeated for each pair of masks, model_name (str): The name of the pre-trained SAM model. Default is "facebook/sam-vit-huge".
effectively filtering out masks that are significantly overlapped by larger ones. device (torch.device): The device to run the model on. Default is the current device.
input_points (List[List[int]]): The 2D location of a window in the image to segment. Default is [[450, 600]].
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
Parameters: Attributes:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the model_name (str): The name of the pre-trained SAM model.
number of masks, `H` is the height, and `W` is the width. device (torch.device): The device to run the model on.
iou_threshold (float): The IoU threshold for determining significant overlap. input_points (List[List[int]]): The 2D location of a window in the image to segment.
model (SamModel): The pre-trained SAM model.
processor (SamProcessor): The processor for the SAM model.
Returns: Methods:
np.ndarray: A 3D numpy array of filtered masks. run(task=None, img=None, *args, **kwargs): Runs the SAM model on the given image and returns the segmentation scores and masks.
""" process_img(img: str = None, *args, **kwargs): Processes the input image and returns the processed image.
num_masks = masks.shape[0]
areas = masks.sum(axis=(1, 2))
sorted_idx = np.argsort(-areas)
keep_mask = np.ones(num_masks, dtype=bool)
iou_matrix = compute_mask_iou_vectorized(masks)
for i in range(num_masks):
if not keep_mask[sorted_idx[i]]:
continue
overlapping_masks = iou_matrix[sorted_idx[i]] > iou_threshold
overlapping_masks[sorted_idx[i]] = False
keep_mask[sorted_idx] = np.logical_and(
keep_mask[sorted_idx], ~overlapping_masks
)
return masks[keep_mask]
def filter_masks_by_relative_area(
masks: np.ndarray,
minimum_area: float = 0.01,
maximum_area: float = 1.0,
) -> np.ndarray:
"""
Filters masks based on their relative area within the total area of each mask.
Parameters:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the
number of masks, `H` is the height, and `W` is the width.
minimum_area (float): The minimum relative area threshold. Must be between `0`
and `1`.
maximum_area (float): The maximum relative area threshold. Must be between `0`
and `1`.
Returns:
np.ndarray: A 3D numpy array containing masks that fall within the specified
relative area range.
Raises:
ValueError: If `minimum_area` or `maximum_area` are outside the `0` to `1`
range, or if `minimum_area` is greater than `maximum_area`.
""" """
if not (isinstance(masks, np.ndarray) and masks.ndim == 3): def __init__(
raise ValueError("Input must be a 3D numpy array.") self,
model_name: str = "facebook/sam-vit-huge",
if not (0 <= minimum_area <= 1) or not (0 <= maximum_area <= 1): device=device,
raise ValueError( input_points: List[List[int]] = [[450, 600]],
"`minimum_area` and `maximum_area` must be between 0" *args,
" and 1." **kwargs,
) ):
self.model_name = model_name
if minimum_area > maximum_area: self.device = device
raise ValueError( self.input_points = input_points
"`minimum_area` must be less than or equal to"
" `maximum_area`."
)
total_area = masks.shape[1] * masks.shape[2]
relative_areas = masks.sum(axis=(1, 2)) / total_area
return masks[
(relative_areas >= minimum_area)
& (relative_areas <= maximum_area)
]
def adjust_mask_features_by_relative_area(
mask: np.ndarray,
area_threshold: float,
feature_type: FeatureType = FeatureType.ISLAND,
) -> np.ndarray:
"""
Adjusts a mask by removing small islands or filling small holes based on a relative
area threshold.
!!! warning
Running this function on a mask with small islands may result in empty masks.
Parameters:
mask (np.ndarray): A 2D numpy array with shape `(H, W)`, where `H` is the
height, and `W` is the width.
area_threshold (float): Threshold for relative area to remove or fill features.
feature_type (FeatureType): Type of feature to adjust (`ISLAND` for removing
islands, `HOLE` for filling holes).
Returns:
np.ndarray: A 2D numpy array containing mask.
"""
height, width = mask.shape
total_area = width * height
mask = np.uint8(mask * 255) self.model = SamModel.from_pretrained(
operation = ( model_name, *args, **kwargs
cv2.RETR_EXTERNAL ).to(device)
if feature_type == FeatureType.ISLAND
else cv2.RETR_CCOMP
)
contours, _ = cv2.findContours(
mask, operation, cv2.CHAIN_APPROX_SIMPLE
)
for contour in contours: self.processor = SamProcessor.from_pretrained(model_name)
area = cv2.contourArea(contour)
relative_area = area / total_area
if relative_area < area_threshold:
cv2.drawContours(
image=mask,
contours=[contour],
contourIdx=-1,
color=(
0 if feature_type == FeatureType.ISLAND else 255
),
thickness=-1,
)
return np.where(mask > 0, 1, 0).astype(bool)
def run(self, task=None, img=None, *args, **kwargs):
"""
Runs the SAM model on the given image and returns the segmentation scores and masks.
def masks_to_marks(masks: np.ndarray) -> sv.Detections: Args:
""" task: The task to perform. Not used in this method.
Converts a set of masks to a marks (sv.Detections) object. img: The input image to segment.
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
Parameters: Returns:
masks (np.ndarray): A 3D numpy array with shape `(N, H, W)`, where `N` is the Tuple: A tuple containing the segmentation scores and masks.
number of masks, `H` is the height, and `W` is the width.
Returns: """
sv.Detections: An object containing the masks and their bounding box img = self.process_img(img)
coordinates.
"""
return sv.Detections(
mask=masks, xyxy=sv.mask_to_xyxy(masks=masks)
)
# Specify the points of the mask to segment
input_points = [
self.input_points
] # 2D location of a window in the image
def refine_marks( # Preprocess the image
marks: sv.Detections, inputs = self.processor(
maximum_hole_area: float = 0.01, img, input_points=input_points, return_tensors="pt"
maximum_island_area: float = 0.01, ).to(device)
minimum_mask_area: float = 0.02,
maximum_mask_area: float = 1.0,
) -> sv.Detections:
"""
Refines a set of masks by removing small islands and holes, and filtering by mask
area.
Parameters: with torch.no_grad():
marks (sv.Detections): An object containing the masks and their bounding box outputs = self.model(**inputs) # noqa: E999
coordinates.
maximum_hole_area (float): The maximum relative area of holes to be filled in
each mask.
maximum_island_area (float): The maximum relative area of islands to be removed
from each mask.
minimum_mask_area (float): The minimum relative area for a mask to be retained.
maximum_mask_area (float): The maximum relative area for a mask to be retained.
Returns: masks = self.processor.image_processor.post_process_masks(
sv.Detections: An object containing the masks and their bounding box outputs.pred_masks.cpu(),
coordinates. inputs["original_sizes"].cpu(),
""" inputs["reshaped_input_sizes"].cpu(),
result_masks = []
for mask in marks.mask:
mask = adjust_mask_features_by_relative_area(
mask=mask,
area_threshold=maximum_island_area,
feature_type=FeatureType.ISLAND,
) )
mask = adjust_mask_features_by_relative_area( scores = outputs.iou_scores
mask=mask,
area_threshold=maximum_hole_area,
feature_type=FeatureType.HOLE,
)
if np.any(mask):
result_masks.append(mask)
result_masks = np.array(result_masks)
result_masks = filter_masks_by_relative_area(
masks=result_masks,
minimum_area=minimum_mask_area,
maximum_area=maximum_mask_area,
)
return sv.Detections(
mask=result_masks, xyxy=sv.mask_to_xyxy(masks=result_masks)
)
class SegmentAnythingMarkGenerator:
"""
A class for performing image segmentation using a specified model.
Parameters: return scores, masks
device (str): The device to run the model on (e.g., 'cpu', 'cuda').
model_name (str): The name of the model to be loaded. Defaults to
'facebook/sam-vit-huge'.
"""
def __init__(
self,
device: str = "cpu",
model_name: str = "facebook/sam-vit-huge",
):
self.model = SamModel.from_pretrained(model_name).to(device)
self.processor = SamProcessor.from_pretrained(model_name)
self.image_processor = SamImageProcessor.from_pretrained(
model_name
)
self.pipeline = pipeline(
task="mask-generation",
model=self.model,
image_processor=self.image_processor,
device=device,
)
def run(self, image: np.ndarray) -> sv.Detections: def process_img(self, img: str = None, *args, **kwargs):
""" """
Generate image segmentation marks. Processes the input image and returns the processed image.
Parameters: Args:
image (np.ndarray): The image to be marked in BGR format. img (str): The URL or file path of the input image.
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
Returns: Returns:
sv.Detections: An object containing the segmentation masks and their Image: The processed image.
corresponding bounding box coordinates.
""" """
image = Image.fromarray( raw_image = Image.open(
cv2.cvtColor(image, cv2.COLOR_BGR2RGB) requests.get(img, stream=True, *args, **kwargs).raw
) ).convert("RGB")
outputs = self.pipeline(image, points_per_batch=64)
masks = np.array(outputs["masks"]) return raw_image
return masks_to_marks(masks=masks)

Loading…
Cancel
Save