You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/swarms/structs/swarm_load_balancer.py

337 lines
11 KiB

import random
from threading import Lock
from time import sleep
from typing import Callable, List, Optional
from swarms.structs.agent import Agent
from swarms.structs.base_swarm import BaseSwarm
from swarms.utils.loguru_logger import logger
class AgentLoadBalancer(BaseSwarm):
"""
A load balancer class that distributes tasks among a group of agents.
Args:
agents (List[Agent]): The list of agents available for task execution.
max_retries (int, optional): The maximum number of retries for a task if it fails. Defaults to 3.
max_loops (int, optional): The maximum number of loops to run a task. Defaults to 5.
cooldown_time (float, optional): The cooldown time between retries. Defaults to 0.
Attributes:
agents (List[Agent]): The list of agents available for task execution.
agent_status (Dict[str, bool]): The status of each agent, indicating whether it is available or not.
max_retries (int): The maximum number of retries for a task if it fails.
max_loops (int): The maximum number of loops to run a task.
agent_performance (Dict[str, Dict[str, int]]): The performance statistics of each agent.
lock (Lock): A lock to ensure thread safety.
cooldown_time (float): The cooldown time between retries.
Methods:
get_available_agent: Get an available agent for task execution.
set_agent_status: Set the status of an agent.
update_performance: Update the performance statistics of an agent.
log_performance: Log the performance statistics of all agents.
run_task: Run a single task using an available agent.
run_multiple_tasks: Run multiple tasks using available agents.
run_task_with_loops: Run a task multiple times using an available agent.
run_task_with_callback: Run a task with a callback function.
run_task_with_timeout: Run a task with a timeout.
"""
def __init__(
self,
agents: List[Agent],
max_retries: int = 3,
max_loops: int = 5,
cooldown_time: float = 0,
):
self.agents = agents
self.agent_status = {agent.agent_name: True for agent in agents}
self.max_retries = max_retries
self.max_loops = max_loops
self.agent_performance = {
agent.agent_name: {"success_count": 0, "failure_count": 0}
for agent in agents
}
self.lock = Lock()
self.cooldown_time = cooldown_time
self.swarm_initialization()
def swarm_initialization(self):
logger.info(
"Initializing AgentLoadBalancer with the following agents:"
)
# Make sure all the agents exist
assert self.agents, "No agents provided to the Load Balancer"
# Assert that all agents are of type Agent
for agent in self.agents:
assert isinstance(
agent, Agent
), "All agents should be of type Agent"
for agent in self.agents:
logger.info(f"Agent Name: {agent.agent_name}")
logger.info("Load Balancer Initialized Successfully!")
def get_available_agent(self) -> Optional[Agent]:
"""
Get an available agent for task execution.
Returns:
Optional[Agent]: An available agent, or None if no agents are available.
"""
with self.lock:
available_agents = [
agent
for agent in self.agents
if self.agent_status[agent.agent_name]
]
logger.info(
f"Available agents: {[agent.agent_name for agent in available_agents]}"
)
if not available_agents:
return None
return random.choice(available_agents)
def set_agent_status(self, agent: Agent, status: bool) -> None:
"""
Set the status of an agent.
Args:
agent (Agent): The agent whose status needs to be set.
status (bool): The status to set for the agent.
"""
with self.lock:
self.agent_status[agent.agent_name] = status
def update_performance(self, agent: Agent, success: bool) -> None:
"""
Update the performance statistics of an agent.
Args:
agent (Agent): The agent whose performance statistics need to be updated.
success (bool): Whether the task executed by the agent was successful or not.
"""
with self.lock:
if success:
self.agent_performance[agent.agent_name][
"success_count"
] += 1
else:
self.agent_performance[agent.agent_name][
"failure_count"
] += 1
def log_performance(self) -> None:
"""
Log the performance statistics of all agents.
"""
logger.info("Agent Performance:")
for agent_name, stats in self.agent_performance.items():
logger.info(f"{agent_name}: {stats}")
def run(self, task: str, *args, **kwargs) -> str:
"""
Run a single task using an available agent.
Args:
task (str): The task to be executed.
Returns:
str: The output of the task execution.
Raises:
RuntimeError: If no available agents are found to handle the request.
"""
try:
retries = 0
while retries < self.max_retries:
agent = self.get_available_agent()
if not agent:
raise RuntimeError(
"No available agents to handle the request."
)
try:
self.set_agent_status(agent, False)
output = agent.run(task, *args, **kwargs)
self.update_performance(agent, True)
return output
except Exception as e:
logger.error(
f"Error with agent {agent.agent_name}: {e}"
)
self.update_performance(agent, False)
retries += 1
sleep(self.cooldown_time)
if retries >= self.max_retries:
raise e
finally:
self.set_agent_status(agent, True)
except Exception as e:
logger.error(
f"Task failed: {e} try again by optimizing the code."
)
raise RuntimeError(f"Task failed: {e}")
def run_multiple_tasks(self, tasks: List[str]) -> List[str]:
"""
Run multiple tasks using available agents.
Args:
tasks (List[str]): The list of tasks to be executed.
Returns:
List[str]: The list of outputs corresponding to each task execution.
"""
results = []
for task in tasks:
result = self.run(task)
results.append(result)
return results
def run_task_with_loops(self, task: str) -> List[str]:
"""
Run a task multiple times using an available agent.
Args:
task (str): The task to be executed.
Returns:
List[str]: The list of outputs corresponding to each task execution.
"""
results = []
for _ in range(self.max_loops):
result = self.run(task)
results.append(result)
return results
def run_task_with_callback(
self, task: str, callback: Callable[[str], None]
) -> None:
"""
Run a task with a callback function.
Args:
task (str): The task to be executed.
callback (Callable[[str], None]): The callback function to be called with the task result.
"""
try:
result = self.run(task)
callback(result)
except Exception as e:
logger.error(f"Task failed: {e}")
callback(str(e))
def run_task_with_timeout(self, task: str, timeout: float) -> str:
"""
Run a task with a timeout.
Args:
task (str): The task to be executed.
timeout (float): The maximum time (in seconds) to wait for the task to complete.
Returns:
str: The output of the task execution.
Raises:
TimeoutError: If the task execution exceeds the specified timeout.
Exception: If the task execution raises an exception.
"""
import threading
result = [None]
exception = [None]
def target():
try:
result[0] = self.run(task)
except Exception as e:
exception[0] = e
thread = threading.Thread(target=target)
thread.start()
thread.join(timeout)
if thread.is_alive():
raise TimeoutError(f"Task timed out after {timeout} seconds.")
if exception[0]:
raise exception[0]
return result[0]
# if __name__ == "__main__":
# from swarms import llama3Hosted()
# # User initializes the agents
# agents = [
# Agent(
# agent_name="Transcript Generator 1",
# agent_description="Generate a transcript for a youtube video on what swarms are!",
# llm=llama3Hosted(),
# max_loops="auto",
# autosave=True,
# dashboard=False,
# verbose=True,
# stopping_token="<DONE>",
# interactive=True,
# state_save_file_type="json",
# saved_state_path="transcript_generator_1.json",
# ),
# Agent(
# agent_name="Transcript Generator 2",
# agent_description="Generate a transcript for a youtube video on what swarms are!",
# llm=llama3Hosted(),
# max_loops="auto",
# autosave=True,
# dashboard=False,
# verbose=True,
# stopping_token="<DONE>",
# interactive=True,
# state_save_file_type="json",
# saved_state_path="transcript_generator_2.json",
# )
# # Add more agents as needed
# ]
# load_balancer = LoadBalancer(agents)
# try:
# result = load_balancer.run_task("Generate a transcript for a youtube video on what swarms are!")
# print(result)
# # Running multiple tasks
# tasks = [
# "Generate a transcript for a youtube video on what swarms are!",
# "Generate a transcript for a youtube video on AI advancements!"
# ]
# results = load_balancer.run_multiple_tasks(tasks)
# for res in results:
# print(res)
# # Running task with loops
# loop_results = load_balancer.run_task_with_loops("Generate a transcript for a youtube video on what swarms are!")
# for res in loop_results:
# print(res)
# except RuntimeError as e:
# print(f"Error: {e}")
# # Log performance
# load_balancer.log_performance()