pull/440/head
Kye 9 months ago
parent 5203234fc2
commit fc587b3183

3
.gitignore vendored

@ -24,8 +24,9 @@ swarms/agents/.DS_Store
logs logs
_build _build
conversation.txt conversation.txt
t1_state.json
stderr_log.txt stderr_log.txt
t2_state.json
.vscode .vscode
.DS_STORE .DS_STORE
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files

@ -0,0 +1,233 @@
import logging
from collections import defaultdict
from typing import Callable, Sequence
from swarms import Agent, Anthropic
# Assuming the existence of an appropriate Agent class and logger setup
class AgentRearrange:
def __init__(
self,
agents: Sequence[Agent] = None,
verbose: bool = False,
custom_prompt: str = None,
callbacks: Sequence[Callable] = None,
):
if not all(isinstance(agent, Agent) for agent in agents):
raise ValueError(
"All elements must be instances of the Agent class."
)
self.agents = agents
self.verbose = verbose
self.custom_prompt = custom_prompt
self.callbacks = callbacks if callbacks is not None else []
self.flows = defaultdict(list)
def parse_pattern(self, pattern: str):
"""
Parse the interaction pattern to set up task flows, supporting both sequential
and concurrent executions within the same pattern.
"""
try:
self.flows.clear() # Ensure flows are reset each time pattern is parsed
# Split pattern into potentially concurrent flows
concurrent_flows = pattern.split(",")
for flow in concurrent_flows:
# Trim whitespace and identify sequential parts within each concurrent flow
parts = [part.strip() for part in flow.split("->")]
if len(parts) > 1:
# Link each part sequentially to the next as source -> destination
for i in range(len(parts) - 1):
source = parts[i]
destination = parts[i + 1]
# Validate and add each sequential link
if source not in [
agent.agent_name for agent in self.agents
]:
logging.error(
f"Source agent {source} not found."
)
return False
if destination not in [
agent.agent_name for agent in self.agents
]:
logging.error(
f"Destination agent {destination} not"
" found."
)
return False
self.flows[source].append(destination)
else:
# Handle single agent case if needed
self.flows[parts[0]] = []
return True
except Exception as e:
logging.error(f"Error parsing pattern: {e}")
return False
def self_find_agent_by_name(self, name: str):
for agent in self.agents:
if agent.agent_name == name:
return agent
return None
def agent_exists(self, name: str):
for agent in self.agents:
if agent.agent_name == name:
return True
return False
def parse_concurrent_flow(
self,
flow: str,
):
sequential_agents = flow.split("->")
for i, source_name in enumerate(sequential_agents[:-1]):
destination_name = sequential_agents[i + 1].strip()
self.parse_sequential_flow(
source_name.strip(), destination_name
)
def parse_sequential_flow(
self,
source: str,
destination: str,
):
if not self.self_find_agent_by_name(
source
) or not self.self_find_agent_by_name(destination):
return False
self.flows[source].append(destination)
def execute_task(
self,
dest_agent_name: str,
source: str,
task: str,
specific_tasks: dict,
):
dest_agent = self.self_find_agent_by_name(dest_agent_name)
if not dest_agent:
return None
task_to_run = specific_tasks.get(dest_agent_name, task)
if self.custom_prompt:
out = dest_agent.run(
f"{task_to_run} {self.custom_prompt}"
)
else:
out = dest_agent.run(f"{task_to_run} (from {source})")
return out
def process_flows(self, pattern, default_task, specific_tasks):
if not self.parse_pattern(pattern):
return None
results = []
for source, destinations in self.flows.items():
if not destinations:
task = specific_tasks.get(source, default_task)
source_agent = self.self_find_agent_by_name(source)
if source_agent:
result = source_agent.run(task)
results.append(result)
else:
for destination in destinations:
task = specific_tasks.get(
destination, default_task
)
destination_agent = self.self_find_agent_by_name(
destination
)
if destination_agent:
result = destination_agent.run(task)
results.append(result)
return results
def __call__(
self,
pattern: str = None,
default_task: str = None,
**specific_tasks,
):
self.flows.clear() # Reset previous flows
results = self.process_flows(
pattern, default_task, specific_tasks
)
return results
## Initialize the workflow
agent = Agent(
agent_name="t",
agent_description=(
"Generate a transcript for a youtube video on what swarms"
" are!"
),
system_prompt=(
"Generate a transcript for a youtube video on what swarms"
" are!"
),
llm=Anthropic(),
max_loops=1,
autosave=True,
dashboard=False,
streaming_on=True,
verbose=True,
stopping_token="<DONE>",
)
agent2 = Agent(
agent_name="t1",
agent_description=(
"Generate a transcript for a youtube video on what swarms"
" are!"
),
llm=Anthropic(),
max_loops=1,
system_prompt="Summarize the transcript",
autosave=True,
dashboard=False,
streaming_on=True,
verbose=True,
stopping_token="<DONE>",
)
agent3 = Agent(
agent_name="t2",
agent_description=(
"Generate a transcript for a youtube video on what swarms"
" are!"
),
llm=Anthropic(),
max_loops=1,
system_prompt="Finalize the transcript",
autosave=True,
dashboard=False,
streaming_on=True,
verbose=True,
stopping_token="<DONE>",
)
# Rearrange the agents
rearrange = AgentRearrange(
agents=[agent, agent2, agent3],
verbose=True,
# custom_prompt="Summarize the transcript",
)
# Run the workflow on a task
results = rearrange(
# pattern="t -> t1, t2 -> t2",
pattern="t -> t1 -> t2",
default_task=(
"Generate a transcript for a YouTube video on what swarms"
" are!"
),
t="Generate a transcript for a YouTube video on what swarms are!",
# t2="Summarize the transcript",
# t3="Finalize the transcript",
)
# print(results)

@ -3,7 +3,6 @@ from swarms.models.base_llm import AbstractLLM # noqa: E402
from swarms.models.base_multimodal_model import BaseMultiModalModel from swarms.models.base_multimodal_model import BaseMultiModalModel
from swarms.models.fire_function import FireFunctionCaller from swarms.models.fire_function import FireFunctionCaller
from swarms.models.fuyu import Fuyu # noqa: E402 from swarms.models.fuyu import Fuyu # noqa: E402
from swarms.models.gemini import Gemini # noqa: E402
from swarms.models.gpt4_vision_api import GPT4VisionAPI # noqa: E402 from swarms.models.gpt4_vision_api import GPT4VisionAPI # noqa: E402
from swarms.models.huggingface import HuggingfaceLLM # noqa: E402 from swarms.models.huggingface import HuggingfaceLLM # noqa: E402
from swarms.models.idefics import Idefics # noqa: E402 from swarms.models.idefics import Idefics # noqa: E402
@ -15,7 +14,6 @@ from swarms.models.mixtral import Mixtral # noqa: E402
from swarms.models.mpt import MPT7B # noqa: E402 from swarms.models.mpt import MPT7B # noqa: E402
from swarms.models.nougat import Nougat # noqa: E402 from swarms.models.nougat import Nougat # noqa: E402
from swarms.models.openai_tts import OpenAITTS # noqa: E402 from swarms.models.openai_tts import OpenAITTS # noqa: E402
from swarms.models.petals import Petals # noqa: E402
from swarms.models.popular_llms import ( from swarms.models.popular_llms import (
AnthropicChat as Anthropic, AnthropicChat as Anthropic,
) )
@ -25,9 +23,6 @@ from swarms.models.popular_llms import (
from swarms.models.popular_llms import ( from swarms.models.popular_llms import (
CohereChat as Cohere, CohereChat as Cohere,
) )
from swarms.models.popular_llms import (
MosaicMLChat as MosaicML,
)
from swarms.models.popular_llms import ( from swarms.models.popular_llms import (
OpenAIChatLLM as OpenAIChat, OpenAIChatLLM as OpenAIChat,
) )
@ -39,7 +34,7 @@ from swarms.models.popular_llms import (
) )
from swarms.models.qwen import QwenVLMultiModal # noqa: E402 from swarms.models.qwen import QwenVLMultiModal # noqa: E402
# from swarms.models.sam_supervision import SegmentAnythingMarkGenerator from swarms.models.sam_supervision import SegmentAnythingMarkGenerator
from swarms.models.sampling_params import SamplingParams, SamplingType from swarms.models.sampling_params import SamplingParams, SamplingType
from swarms.models.together import TogetherLLM # noqa: E402 from swarms.models.together import TogetherLLM # noqa: E402
from swarms.models.types import ( # noqa: E402 from swarms.models.types import ( # noqa: E402
@ -51,6 +46,7 @@ from swarms.models.types import ( # noqa: E402
) )
from swarms.models.vilt import Vilt # noqa: E402 from swarms.models.vilt import Vilt # noqa: E402
__all__ = [ __all__ = [
"AbstractLLM", "AbstractLLM",
"Anthropic", "Anthropic",
@ -60,7 +56,6 @@ __all__ = [
"Cohere", "Cohere",
"FireFunctionCaller", "FireFunctionCaller",
"Fuyu", "Fuyu",
"Gemini",
"GPT4VisionAPI", "GPT4VisionAPI",
"HuggingfaceLLM", "HuggingfaceLLM",
"Idefics", "Idefics",
@ -75,16 +70,15 @@ __all__ = [
"OpenAI", "OpenAI",
"OpenAIChat", "OpenAIChat",
"OpenAITTS", "OpenAITTS",
"Petals",
"QwenVLMultiModal", "QwenVLMultiModal",
"Replicate", "Replicate",
"SamplingParams", "SamplingParams",
"SamplingType", "SamplingType",
"SegmentAnythingMarkGenerator",
"TextModality", "TextModality",
"TogetherLLM", "TogetherLLM",
"Vilt", "Vilt",
"AudioModality", "AudioModality",
"ImageModality", "ImageModality",
"VideoModality", "VideoModality",
"MosaicML",
] ]

@ -4,7 +4,6 @@ import cv2
import numpy as np import numpy as np
import requests import requests
import torch import torch
import torchvision.transforms as T
from PIL import Image from PIL import Image
from transformers import AutoModelForVision2Seq, AutoProcessor from transformers import AutoModelForVision2Seq, AutoProcessor
@ -154,7 +153,7 @@ class Kosmos(BaseMultiModalModel):
image_tensor = ( image_tensor = (
image_tensor * reverse_norm_std + reverse_norm_mean image_tensor * reverse_norm_std + reverse_norm_mean
) )
pil_img = T.ToPILImage()(image_tensor) # pil_img = T.ToPILImage()(image_tensor)
image_h = pil_img.height image_h = pil_img.height
image_w = pil_img.width image_w = pil_img.width
image = np.array(pil_img)[:, :, [2, 1, 0]] image = np.array(pil_img)[:, :, [2, 1, 0]]

@ -1,100 +0,0 @@
import os
import supervision as sv
from tqdm import tqdm
from ultralytics import YOLO
from swarms.models.base_llm import AbstractLLM
from swarms.utils.download_weights_from_url import (
download_weights_from_url,
)
class Odin(AbstractLLM):
"""
Odin class represents an object detection and tracking model.
Attributes:
source_weights_path (str): The file path to the YOLO model weights.
confidence_threshold (float): The confidence threshold for object detection.
iou_threshold (float): The intersection over union (IOU) threshold for object detection.
Example:
>>> odin = Odin(
... source_weights_path="yolo.weights",
... confidence_threshold=0.3,
... iou_threshold=0.7,
... )
>>> odin.run(video="input.mp4")
"""
def __init__(
self,
source_weights_path: str = "yolo.weights",
confidence_threshold: float = 0.3,
iou_threshold: float = 0.7,
):
super().__init__()
self.source_weights_path = source_weights_path
self.confidence_threshold = confidence_threshold
self.iou_threshold = iou_threshold
if not os.path.exists(self.source_weights_path):
download_weights_from_url(
url=source_weights_path,
save_path=self.source_weights_path,
)
def run(self, video: str, *args, **kwargs):
"""
Runs the object detection and tracking algorithm on the specified video.
Args:
video (str): The path to the input video file.
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
Returns:
bool: True if the video was processed successfully, False otherwise.
"""
model = YOLO(self.source_weights_path)
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
frame_generator = sv.get_video_frames_generator(
source_path=self.source_video
)
video_info = sv.VideoInfo.from_video(video=video)
with sv.VideoSink(
target_path=self.target_video, video_info=video_info
) as sink:
for frame in tqdm(
frame_generator, total=video_info.total_frames
):
results = model(
frame,
verbose=True,
conf=self.confidence_threshold,
iou=self.iou_threshold,
)[0]
detections = sv.Detections.from_ultranalytics(results)
detections = tracker.update_with_detections(
detections
)
labels = [
f"#{tracker_id} {model.model.names[class_id]}"
for _, _, _, class_id, tracker_id in detections
]
annotated_frame = box_annotator.annotate(
scene=frame.copy(),
detections=detections,
labels=labels,
)
result = sink.write_frame(frame=annotated_frame)
return result

@ -1,46 +0,0 @@
from transformers import AutoModelForCausalLM, AutoTokenizer
from swarms.models.base_llm import AbstractLLM
class Petals(AbstractLLM):
"""Petals Bloom models."""
def __init__(
self,
model_name="bigscience/bloom-petals",
temperature=0.7,
max_new_tokens=256,
top_p=0.9,
top_k=None,
do_sample=True,
max_length=None,
):
self.model_name = model_name
self.temperature = temperature
self.max_new_tokens = max_new_tokens
self.top_p = top_p
self.top_k = top_k
self.do_sample = do_sample
self.max_length = max_length
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(model_name)
def _default_params(self):
"""Get the default parameters for calling Petals API."""
return {
"temperature": self.temperature,
"max_new_tokens": self.max_new_tokens,
"top_p": self.top_p,
"top_k": self.top_k,
"do_sample": self.do_sample,
"max_length": self.max_length,
}
def __call__(self, prompt):
"""Generate text using the Petals API."""
params = self._default_params()
inputs = self.tokenizer(prompt, return_tensors="pt")[
"input_ids"
]
outputs = self.model.generate(inputs, **params)
return self.tokenizer.decode(outputs[0])

@ -1,64 +0,0 @@
from typing import Union
from roboflow import Roboflow
from swarms.models.base_multimodal_model import BaseMultiModalModel
class RoboflowMultiModal(BaseMultiModalModel):
"""
Initializes the RoboflowModel with the given API key, project ID, and version.
Args:
api_key (str): The API key for Roboflow.
project_id (str): The ID of the project.
version (str): The version of the model.
confidence (int, optional): The confidence threshold. Defaults to 50.
overlap (int, optional): The overlap threshold. Defaults to 25.
"""
def __init__(
self,
api_key: str,
project_id: str,
version: str,
confidence: int = 50,
overlap: int = 25,
hosted: bool = False,
*args,
**kwargs,
):
super().__init__(*args, **kwargs)
self.api_key = api_key
self.project_id = project_id
self.verison = version
self.confidence = confidence
self.overlap = overlap
self.hosted = hosted
try:
rf = Roboflow(api_key=api_key, *args, **kwargs)
project = rf.workspace().project(project_id)
self.model = project.version(version).model
self.model.confidence = confidence
self.model.overlap = overlap
except Exception as e:
print(f"Error initializing RoboflowModel: {str(e)}")
def __call__(self, img: Union[str, bytes]):
"""
Runs inference on an image and retrieves predictions.
Args:
img (Union[str, bytes]): The path to the image or the URL of the image.
hosted (bool, optional): Whether the image is hosted. Defaults to False.
Returns:
Optional[roboflow.Prediction]: The prediction or None if an error occurs.
"""
try:
prediction = self.model.predict(img, hosted=self.hosted)
return prediction
except Exception as e:
print(f"Error running inference: {str(e)}")
return None

@ -1,117 +0,0 @@
from typing import Optional, Callable
import cv2
import numpy as np
import supervision as sv
from PIL import Image
from transformers import (
SamImageProcessor,
SamModel,
SamProcessor,
pipeline,
)
from swarms.models.base_multimodal_model import BaseMultiModalModel
class SegmentAnythingMarkGenerator(BaseMultiModalModel):
"""
A class for performing image segmentation using a specified model.
Parameters:
device (str): The device to run the model on (e.g., 'cpu', 'cuda').
model_name (str): The name of the model to be loaded. Defaults to
'facebook/sam-vit-huge'.
"""
def __init__(
self,
device: str = "cpu",
model_name: str = "facebook/sam-vit-huge",
visualize_marks: bool = False,
masks_to_marks: Callable = sv.masks_to_marks,
*args,
**kwargs,
):
super(SegmentAnythingMarkGenerator).__init__(*args, **kwargs)
self.device = device
self.model_name = model_name
self.visualize_marks = visualize_marks
self.masks_to_marks = masks_to_marks
self.model = SamModel.from_pretrained(
model_name, *args, **kwargs
).to(device)
self.processor = SamProcessor.from_pretrained(model_name)
self.image_processor = SamImageProcessor.from_pretrained(
model_name
)
self.device = device
self.pipeline = pipeline(
task="mask-generation",
model=self.model,
image_processor=self.image_processor,
device=self.device,
)
def __call__(
self, image: np.ndarray, mask: Optional[np.ndarray] = None
) -> sv.Detections:
"""
Generate image segmentation marks.
Parameters:
image (np.ndarray): The image to be marked in BGR format.
mask: (Optional[np.ndarray]): The mask to be used as a guide for
segmentation.
Returns:
sv.Detections: An object containing the segmentation masks and their
corresponding bounding box coordinates.
"""
image = Image.fromarray(
cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
)
if mask is None:
outputs = self.pipeline(image, points_per_batch=64)
masks = np.array(outputs["masks"])
return self.masks_to_marks(masks=masks)
else:
inputs = self.processor(image, return_tensors="pt").to(
self.device
)
image_embeddings = self.model.get_image_embeddings(
inputs.pixel_values
)
masks = []
for polygon in sv.mask_to_polygons(mask.astype(bool)):
indexes = np.random.choice(
a=polygon.shape[0], size=5, replace=True
)
input_points = polygon[indexes]
inputs = self.processor(
images=image,
input_points=[[input_points]],
return_tensors="pt",
).to(self.device)
del inputs["pixel_values"]
outputs = self.model(
image_embeddings=image_embeddings, **inputs
)
mask = (
self.processor.image_processor.post_process_masks(
masks=outputs.pred_masks.cpu().detach(),
original_sizes=inputs["original_sizes"]
.cpu()
.detach(),
reshaped_input_sizes=inputs[
"reshaped_input_sizes"
]
.cpu()
.detach(),
)[0][0][0].numpy()
)
masks.append(mask)
masks = np.array(masks)
return self.masks_to_marks(masks=masks)
# def visualize_img(self):

@ -158,7 +158,7 @@ class Agent:
template: Optional[str] = None, template: Optional[str] = None,
max_loops: Optional[int] = 1, max_loops: Optional[int] = 1,
stopping_condition: Optional[Callable[[str], bool]] = None, stopping_condition: Optional[Callable[[str], bool]] = None,
loop_interval: int = 1, loop_interval: int = 0,
retry_attempts: int = 3, retry_attempts: int = 3,
retry_interval: int = 1, retry_interval: int = 1,
return_history: bool = False, return_history: bool = False,

@ -129,11 +129,11 @@ class AgentRearrange:
) )
else: else:
dest_agent.run(f"{task} (from {source})") dest_agent.run(f"{task} (from {source})")
else: # else:
raise ValueError( # raise ValueError(
"No agents provided. Please provide agents to" # "No agents provided. Please provide agents to"
" execute the task." # " execute the task."
) # )
except Exception as e: except Exception as e:
logger.error( logger.error(
f"Error: {e} try again by providing agents and" f"Error: {e} try again by providing agents and"
@ -145,9 +145,9 @@ class AgentRearrange:
# # Example usage # # Example usage
# try: # try:
# agents = [ # agents = [
# Agent(name=f"b{i}") for i in range(1, 4) # Agent(agent_name=f"b{i}") for i in range(1, 4)
# ] # Creating agents b1, b2, b3 # ] # Creating agents b1, b2, b3
# agents.append(Agent(name="d")) # Adding agent d # agents.append(Agent(agent_name="d")) # Adding agent d
# rearranger = Rearrange(agents) # rearranger = Rearrange(agents)
# # Specifying a complex pattern for task execution # # Specifying a complex pattern for task execution

@ -1,14 +1,14 @@
from typing import List from typing import List
from swarms.structs.agent import Agent from swarms.structs.agent import Agent
from swarms.structs.base_multiagent_structure import ( from swarms.structs.base_swarm import (
BaseMultiAgentStructure, BaseSwarm,
) )
from swarms.structs.conversation import Conversation from swarms.structs.conversation import Conversation
from swarms.utils.logger import logger from swarms.utils.logger import logger
class StackOverflowSwarm(BaseMultiAgentStructure): class StackOverflowSwarm(BaseSwarm):
""" """
Represents a swarm of agents that work together to solve a problem or answer a question on Stack Overflow. Represents a swarm of agents that work together to solve a problem or answer a question on Stack Overflow.

@ -1,65 +0,0 @@
from typing import Any, List, Union
from pydantic import BaseModel
from swarms.tools.tool import BaseTool
from swarms.utils.loguru_logger import logger
class OmniTool(BaseModel):
"""
A class representing an OmniTool.
Attributes:
tools (Union[List[BaseTool], List[BaseModel], List[Any]]): A list of tools.
verbose (bool): A flag indicating whether to enable verbose mode.
Methods:
transform_models_to_tools(): Transforms models to tools.
__call__(*args, **kwargs): Calls the tools.
"""
tools: Union[List[BaseTool], List[BaseModel], List[Any]]
verbose: bool = False
def transform_models_to_tools(self):
"""
Transforms models to tools.
"""
for i, tool in enumerate(self.tools):
if isinstance(tool, BaseModel):
tool_json = tool.model_dump_json()
# Assuming BaseTool has a method to load from json
self.tools[i] = BaseTool.load_from_json(tool_json)
def __call__(self, *args, **kwargs):
"""
Calls the tools.
Args:
*args: Variable length argument list.
**kwargs: Arbitrary keyword arguments.
Returns:
Tuple: A tuple containing the arguments and keyword arguments.
"""
try:
self.transform_models_to_tools()
logger.info(f"Number of tools: {len(self.tools)}")
try:
for tool in self.tools:
logger.info(f"Running tool: {tool}")
tool(*args, **kwargs)
except Exception as e:
logger.error(
f"Error occurred while running tools: {e}"
)
return args, kwargs
except Exception as error:
logger.error(
f"Error occurred while running tools: {error}"
)
return args, kwargs
Loading…
Cancel
Save