|
|
|
@ -43,6 +43,8 @@ except ImportError:
|
|
|
|
|
"Transformers package not found. HuggingFace models will not be supported."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
from swarms.security import SwarmShieldIntegration, ShieldConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ModelType(Enum):
|
|
|
|
|
"""Enum defining supported model types."""
|
|
|
|
@ -298,35 +300,53 @@ class ModelGrid:
|
|
|
|
|
allocation_strategy: GPUAllocationStrategy = GPUAllocationStrategy.MEMORY_OPTIMIZED,
|
|
|
|
|
memory_buffer: float = 0.5, # GB buffer to leave on each GPU
|
|
|
|
|
max_cpu_models: int = 0, # Maximum models to keep on CPU if no GPU space
|
|
|
|
|
max_workers: int = 4,
|
|
|
|
|
use_multiprocessing: bool = True,
|
|
|
|
|
enable_logging: bool = True,
|
|
|
|
|
log_level: str = "INFO",
|
|
|
|
|
shield_config: Optional[ShieldConfig] = None,
|
|
|
|
|
enable_security: bool = True,
|
|
|
|
|
security_level: str = "standard",
|
|
|
|
|
):
|
|
|
|
|
"""
|
|
|
|
|
Initialize the model manager.
|
|
|
|
|
Initialize the ModelGrid.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
allocation_strategy: Strategy for allocating models to GPUs
|
|
|
|
|
memory_buffer: Memory buffer to leave on each GPU (in GB)
|
|
|
|
|
max_cpu_models: Maximum number of models to keep on CPU if no GPU space
|
|
|
|
|
max_workers: Maximum number of worker processes
|
|
|
|
|
use_multiprocessing: Whether to use multiprocessing for model execution
|
|
|
|
|
enable_logging: Whether to enable detailed logging
|
|
|
|
|
log_level: Logging level
|
|
|
|
|
shield_config (ShieldConfig, optional): Security configuration for SwarmShield integration. Defaults to None.
|
|
|
|
|
enable_security (bool, optional): Whether to enable SwarmShield security features. Defaults to True.
|
|
|
|
|
security_level (str, optional): Pre-defined security level. Options: "basic", "standard", "enhanced", "maximum". Defaults to "standard".
|
|
|
|
|
"""
|
|
|
|
|
# Set log level
|
|
|
|
|
logger.remove()
|
|
|
|
|
logger.add(sys.stderr, level=log_level)
|
|
|
|
|
logger.add(
|
|
|
|
|
"gpu_model_manager.log",
|
|
|
|
|
rotation="100 MB",
|
|
|
|
|
retention="1 week",
|
|
|
|
|
level=log_level,
|
|
|
|
|
)
|
|
|
|
|
if enable_logging:
|
|
|
|
|
logger.remove()
|
|
|
|
|
logger.add(sys.stderr, level=log_level)
|
|
|
|
|
logger.add(
|
|
|
|
|
"gpu_model_manager.log",
|
|
|
|
|
rotation="100 MB",
|
|
|
|
|
retention="1 week",
|
|
|
|
|
level=log_level,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.models: Dict[str, ModelMetadata] = {}
|
|
|
|
|
self.gpu_manager = GPUManager()
|
|
|
|
|
self.allocation_strategy = allocation_strategy
|
|
|
|
|
self.memory_buffer = memory_buffer
|
|
|
|
|
self.max_cpu_models = max_cpu_models
|
|
|
|
|
self.max_workers = max_workers
|
|
|
|
|
self.use_multiprocessing = use_multiprocessing
|
|
|
|
|
self.enable_logging = enable_logging
|
|
|
|
|
|
|
|
|
|
# Initialize SwarmShield integration
|
|
|
|
|
self._initialize_swarm_shield(shield_config, enable_security, security_level)
|
|
|
|
|
|
|
|
|
|
# Initialize components
|
|
|
|
|
self.gpu_manager = GPUManager()
|
|
|
|
|
self.models: Dict[str, ModelMetadata] = {}
|
|
|
|
|
|
|
|
|
|
# Initialize locks and queues for multiprocessing
|
|
|
|
|
self.manager = (
|
|
|
|
@ -338,15 +358,98 @@ class ModelGrid:
|
|
|
|
|
self.result_queues: Dict[str, Any] = (
|
|
|
|
|
self.manager.dict() if use_multiprocessing else {}
|
|
|
|
|
)
|
|
|
|
|
self.model_locks: Dict[str, Any] = {}
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
f"ModelGrid initialized with {len(self.gpu_manager.gpus)} GPUs"
|
|
|
|
|
)
|
|
|
|
|
logger.info(
|
|
|
|
|
f"Using allocation strategy: {allocation_strategy.value}"
|
|
|
|
|
self.model_locks: Dict[str, Any] = (
|
|
|
|
|
self.manager.dict() if use_multiprocessing else {}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if self.enable_logging:
|
|
|
|
|
logger.info(
|
|
|
|
|
f"ModelGrid initialized with {len(self.gpu_manager.gpus)} GPUs"
|
|
|
|
|
)
|
|
|
|
|
logger.info(
|
|
|
|
|
f"Using allocation strategy: {allocation_strategy.value}"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _initialize_swarm_shield(
|
|
|
|
|
self,
|
|
|
|
|
shield_config: Optional[ShieldConfig] = None,
|
|
|
|
|
enable_security: bool = True,
|
|
|
|
|
security_level: str = "standard"
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Initialize SwarmShield integration for security features."""
|
|
|
|
|
self.enable_security = enable_security
|
|
|
|
|
self.security_level = security_level
|
|
|
|
|
|
|
|
|
|
if enable_security:
|
|
|
|
|
if shield_config is None:
|
|
|
|
|
shield_config = ShieldConfig.get_security_level(security_level)
|
|
|
|
|
|
|
|
|
|
self.swarm_shield = SwarmShieldIntegration(shield_config)
|
|
|
|
|
else:
|
|
|
|
|
self.swarm_shield = None
|
|
|
|
|
|
|
|
|
|
# Security methods
|
|
|
|
|
def validate_task_with_shield(self, task: str) -> str:
|
|
|
|
|
"""Validate and sanitize task input using SwarmShield."""
|
|
|
|
|
if self.swarm_shield:
|
|
|
|
|
return self.swarm_shield.validate_and_protect_input(task)
|
|
|
|
|
return task
|
|
|
|
|
|
|
|
|
|
def validate_agent_config_with_shield(self, agent_config: dict) -> dict:
|
|
|
|
|
"""Validate agent configuration using SwarmShield."""
|
|
|
|
|
if self.swarm_shield:
|
|
|
|
|
return self.swarm_shield.validate_and_protect_input(str(agent_config))
|
|
|
|
|
return agent_config
|
|
|
|
|
|
|
|
|
|
def process_agent_communication_with_shield(self, message: str, agent_name: str) -> str:
|
|
|
|
|
"""Process agent communication through SwarmShield security."""
|
|
|
|
|
if self.swarm_shield:
|
|
|
|
|
return self.swarm_shield.process_agent_communication(message, agent_name)
|
|
|
|
|
return message
|
|
|
|
|
|
|
|
|
|
def check_rate_limit_with_shield(self, agent_name: str) -> bool:
|
|
|
|
|
"""Check rate limits for an agent using SwarmShield."""
|
|
|
|
|
if self.swarm_shield:
|
|
|
|
|
return self.swarm_shield.check_rate_limit(agent_name)
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def add_secure_message(self, message: str, agent_name: str) -> None:
|
|
|
|
|
"""Add a message to secure conversation history."""
|
|
|
|
|
if self.swarm_shield:
|
|
|
|
|
self.swarm_shield.add_secure_message(message, agent_name)
|
|
|
|
|
|
|
|
|
|
def get_secure_messages(self) -> List[dict]:
|
|
|
|
|
"""Get secure conversation messages."""
|
|
|
|
|
if self.swarm_shield:
|
|
|
|
|
return self.swarm_shield.get_secure_messages()
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
def get_security_stats(self) -> dict:
|
|
|
|
|
"""Get security statistics and metrics."""
|
|
|
|
|
if self.swarm_shield:
|
|
|
|
|
return self.swarm_shield.get_security_stats()
|
|
|
|
|
return {"security_enabled": False}
|
|
|
|
|
|
|
|
|
|
def update_shield_config(self, new_config: ShieldConfig) -> None:
|
|
|
|
|
"""Update SwarmShield configuration."""
|
|
|
|
|
if self.swarm_shield:
|
|
|
|
|
self.swarm_shield.update_config(new_config)
|
|
|
|
|
|
|
|
|
|
def enable_security(self) -> None:
|
|
|
|
|
"""Enable SwarmShield security features."""
|
|
|
|
|
if not self.swarm_shield:
|
|
|
|
|
self._initialize_swarm_shield(enable_security=True, security_level=self.security_level)
|
|
|
|
|
|
|
|
|
|
def disable_security(self) -> None:
|
|
|
|
|
"""Disable SwarmShield security features."""
|
|
|
|
|
self.swarm_shield = None
|
|
|
|
|
self.enable_security = False
|
|
|
|
|
|
|
|
|
|
def cleanup_security(self) -> None:
|
|
|
|
|
"""Clean up SwarmShield resources."""
|
|
|
|
|
if self.swarm_shield:
|
|
|
|
|
self.swarm_shield.cleanup()
|
|
|
|
|
|
|
|
|
|
def add_model(
|
|
|
|
|
self,
|
|
|
|
|
model_name: str,
|
|
|
|
|