Merge pull request #1000 from filip-michalsky/add-stagehand

add stagehand example
2 months ago · c0c9b7201a
parent 159a97133c 4ec84a9289
commit c0c9b7201a
8 changed files with 2296 additions and 0 deletions
--- a/examples/stagehand/1_stagehand_wrapper_agent.py
+++ b/examples/stagehand/1_stagehand_wrapper_agent.py
@ -0,0 +1,265 @@
+"""
+Stagehand Browser Automation Agent for Swarms
+=============================================
+
+This example demonstrates how to create a Swarms-compatible agent
+that wraps Stagehand's browser automation capabilities.
+
+The StagehandAgent class inherits from the Swarms Agent base class
+and implements browser automation through natural language commands.
+"""
+
+import asyncio
+import json
+import os
+from typing import Any, Dict, Optional
+
+from dotenv import load_dotenv
+from loguru import logger
+from pydantic import BaseModel, Field
+
+from swarms import Agent as SwarmsAgent
+from stagehand import Stagehand, StagehandConfig
+
+load_dotenv()
+
+
+class WebData(BaseModel):
+    """Schema for extracted web data."""
+
+    url: str = Field(..., description="The URL of the page")
+    title: str = Field(..., description="Page title")
+    content: str = Field(..., description="Extracted content")
+    metadata: Dict[str, Any] = Field(
+        default_factory=dict, description="Additional metadata"
+    )
+
+
+class StagehandAgent(SwarmsAgent):
+    """
+    A Swarms agent that integrates Stagehand for browser automation.
+
+    This agent can navigate websites, extract data, perform actions,
+    and observe page elements using natural language instructions.
+    """
+
+    def __init__(
+        self,
+        agent_name: str = "StagehandBrowserAgent",
+        browserbase_api_key: Optional[str] = None,
+        browserbase_project_id: Optional[str] = None,
+        model_name: str = "gpt-4o-mini",
+        model_api_key: Optional[str] = None,
+        env: str = "LOCAL",  # LOCAL or BROWSERBASE
+        *args,
+        **kwargs,
+    ):
+        """
+        Initialize the StagehandAgent.
+
+        Args:
+            agent_name: Name of the agent
+            browserbase_api_key: API key for Browserbase (if using cloud)
+            browserbase_project_id: Project ID for Browserbase
+            model_name: LLM model to use
+            model_api_key: API key for the model
+            env: Environment - LOCAL or BROWSERBASE
+        """
+        # Don't pass stagehand-specific args to parent
+        super().__init__(agent_name=agent_name, *args, **kwargs)
+
+        self.stagehand_config = StagehandConfig(
+            env=env,
+            api_key=browserbase_api_key
+            or os.getenv("BROWSERBASE_API_KEY"),
+            project_id=browserbase_project_id
+            or os.getenv("BROWSERBASE_PROJECT_ID"),
+            model_name=model_name,
+            model_api_key=model_api_key
+            or os.getenv("OPENAI_API_KEY"),
+        )
+        self.stagehand = None
+        self._initialized = False
+
+    async def _init_stagehand(self):
+        """Initialize Stagehand instance."""
+        if not self._initialized:
+            self.stagehand = Stagehand(self.stagehand_config)
+            await self.stagehand.init()
+            self._initialized = True
+            logger.info(
+                f"Stagehand initialized for {self.agent_name}"
+            )
+
+    async def _close_stagehand(self):
+        """Close Stagehand instance."""
+        if self.stagehand and self._initialized:
+            await self.stagehand.close()
+            self._initialized = False
+            logger.info(f"Stagehand closed for {self.agent_name}")
+
+    def run(self, task: str, *args, **kwargs) -> str:
+        """
+        Execute a browser automation task.
+
+        The task string should contain instructions like:
+        - "Navigate to example.com and extract the main content"
+        - "Go to google.com and search for 'AI agents'"
+        - "Extract all company names from https://ycombinator.com"
+
+        Args:
+            task: Natural language description of the browser task
+
+        Returns:
+            String result of the task execution
+        """
+        return asyncio.run(self._async_run(task, *args, **kwargs))
+
+    async def _async_run(self, task: str, *args, **kwargs) -> str:
+        """Async implementation of run method."""
+        try:
+            await self._init_stagehand()
+
+            # Parse the task to determine actions
+            result = await self._execute_browser_task(task)
+
+            return json.dumps(result, indent=2)
+
+        except Exception as e:
+            logger.error(f"Error in browser task: {str(e)}")
+            return f"Error executing browser task: {str(e)}"
+        finally:
+            # Keep browser open for potential follow-up tasks
+            pass
+
+    async def _execute_browser_task(
+        self, task: str
+    ) -> Dict[str, Any]:
+        """
+        Execute a browser task based on natural language instructions.
+
+        This method interprets the task and calls appropriate Stagehand methods.
+        """
+        page = self.stagehand.page
+        result = {"task": task, "status": "completed", "data": {}}
+
+        # Determine if task involves navigation
+        if any(
+            keyword in task.lower()
+            for keyword in ["navigate", "go to", "visit", "open"]
+        ):
+            # Extract URL from task
+            import re
+
+            url_pattern = r"https?://[^\s]+"
+            urls = re.findall(url_pattern, task)
+            if not urls and any(
+                domain in task for domain in [".com", ".org", ".net"]
+            ):
+                # Try to extract domain names
+                domain_pattern = r"(\w+\.\w+)"
+                domains = re.findall(domain_pattern, task)
+                if domains:
+                    urls = [f"https://{domain}" for domain in domains]
+
+            if urls:
+                url = urls[0]
+                await page.goto(url)
+                result["data"]["navigated_to"] = url
+                logger.info(f"Navigated to {url}")
+
+        # Determine action type
+        if "extract" in task.lower():
+            # Perform extraction
+            extraction_prompt = task.replace("extract", "").strip()
+            extracted = await page.extract(extraction_prompt)
+            result["data"]["extracted"] = extracted
+            result["action"] = "extract"
+
+        elif "click" in task.lower() or "press" in task.lower():
+            # Perform action
+            action_result = await page.act(task)
+            result["data"]["action_performed"] = str(action_result)
+            result["action"] = "act"
+
+        elif "search" in task.lower():
+            # Perform search action
+            search_query = (
+                task.split("search for")[-1].strip().strip("'\"")
+            )
+            # First, find the search box
+            search_box = await page.observe(
+                "find the search input field"
+            )
+            if search_box:
+                # Click on search box and type
+                await page.act(f"click on {search_box[0]}")
+                await page.act(f"type '{search_query}'")
+                await page.act("press Enter")
+                result["data"]["search_query"] = search_query
+                result["action"] = "search"
+
+        elif "observe" in task.lower() or "find" in task.lower():
+            # Perform observation
+            observation = await page.observe(task)
+            result["data"]["observation"] = [
+                {
+                    "description": obs.description,
+                    "selector": obs.selector,
+                }
+                for obs in observation
+            ]
+            result["action"] = "observe"
+
+        else:
+            # General action
+            action_result = await page.act(task)
+            result["data"]["action_result"] = str(action_result)
+            result["action"] = "general"
+
+        return result
+
+    def cleanup(self):
+        """Clean up browser resources."""
+        if self._initialized:
+            asyncio.run(self._close_stagehand())
+
+    def __del__(self):
+        """Ensure browser is closed on deletion."""
+        self.cleanup()
+
+
+# Example usage
+if __name__ == "__main__":
+    # Create a Stagehand browser agent
+    browser_agent = StagehandAgent(
+        agent_name="WebScraperAgent",
+        model_name="gpt-4o-mini",
+        env="LOCAL",  # Use LOCAL for Playwright, BROWSERBASE for cloud
+    )
+
+    # Example 1: Navigate and extract data
+    print("Example 1: Basic navigation and extraction")
+    result1 = browser_agent.run(
+        "Navigate to https://news.ycombinator.com and extract the titles of the top 5 stories"
+    )
+    print(result1)
+    print("\n" + "=" * 50 + "\n")
+
+    # Example 2: Perform a search
+    print("Example 2: Search on a website")
+    result2 = browser_agent.run(
+        "Go to google.com and search for 'Swarms AI framework'"
+    )
+    print(result2)
+    print("\n" + "=" * 50 + "\n")
+
+    # Example 3: Extract structured data
+    print("Example 3: Extract specific information")
+    result3 = browser_agent.run(
+        "Navigate to https://example.com and extract the main heading and first paragraph"
+    )
+    print(result3)
+
+    # Clean up
+    browser_agent.cleanup()
--- a/examples/stagehand/2_stagehand_tools_agent.py
+++ b/examples/stagehand/2_stagehand_tools_agent.py
@ -0,0 +1,397 @@
+"""
+Stagehand Tools for Swarms Agent
+=================================
+
+This example demonstrates how to create Stagehand browser automation tools
+that can be used by a standard Swarms Agent. Each Stagehand method (act,
+extract, observe) becomes a separate tool that the agent can use.
+
+This approach gives the agent more fine-grained control over browser
+automation tasks.
+"""
+
+import asyncio
+import json
+import os
+from typing import Optional
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from swarms import Agent
+from stagehand import Stagehand, StagehandConfig
+
+load_dotenv()
+
+
+class BrowserState:
+    """Singleton to manage browser state across tools."""
+
+    _instance = None
+    _stagehand = None
+    _initialized = False
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    async def init_browser(
+        self,
+        env: str = "LOCAL",
+        api_key: Optional[str] = None,
+        project_id: Optional[str] = None,
+        model_name: str = "gpt-4o-mini",
+        model_api_key: Optional[str] = None,
+    ):
+        """Initialize the browser if not already initialized."""
+        if not self._initialized:
+            config = StagehandConfig(
+                env=env,
+                api_key=api_key or os.getenv("BROWSERBASE_API_KEY"),
+                project_id=project_id
+                or os.getenv("BROWSERBASE_PROJECT_ID"),
+                model_name=model_name,
+                model_api_key=model_api_key
+                or os.getenv("OPENAI_API_KEY"),
+            )
+            self._stagehand = Stagehand(config)
+            await self._stagehand.init()
+            self._initialized = True
+            logger.info("Stagehand browser initialized")
+
+    async def get_page(self):
+        """Get the current page instance."""
+        if not self._initialized:
+            raise RuntimeError(
+                "Browser not initialized. Call init_browser first."
+            )
+        return self._stagehand.page
+
+    async def close(self):
+        """Close the browser."""
+        if self._initialized and self._stagehand:
+            await self._stagehand.close()
+            self._initialized = False
+            logger.info("Stagehand browser closed")
+
+
+# Browser state instance
+browser_state = BrowserState()
+
+
+def navigate_browser(url: str) -> str:
+    """
+    Navigate to a URL in the browser.
+
+    Args:
+        url (str): The URL to navigate to. Should be a valid URL starting with http:// or https://.
+                  If no protocol is provided, https:// will be added automatically.
+
+    Returns:
+        str: Success message with the URL navigated to, or error message if navigation fails
+
+    Raises:
+        RuntimeError: If browser initialization fails
+        Exception: If navigation to the URL fails
+
+    Example:
+        >>> result = navigate_browser("https://example.com")
+        >>> print(result)
+        "Successfully navigated to https://example.com"
+
+        >>> result = navigate_browser("google.com")
+        >>> print(result)
+        "Successfully navigated to https://google.com"
+    """
+    return asyncio.run(_navigate_browser_async(url))
+
+
+async def _navigate_browser_async(url: str) -> str:
+    """Async implementation of navigate_browser."""
+    try:
+        await browser_state.init_browser()
+        page = await browser_state.get_page()
+
+        # Ensure URL has protocol
+        if not url.startswith(("http://", "https://")):
+            url = f"https://{url}"
+
+        await page.goto(url)
+        return f"Successfully navigated to {url}"
+    except Exception as e:
+        logger.error(f"Navigation error: {str(e)}")
+        return f"Failed to navigate to {url}: {str(e)}"
+
+
+def browser_act(action: str) -> str:
+    """
+    Perform an action on the current web page using natural language.
+
+    Args:
+        action (str): Natural language description of the action to perform.
+                     Examples: 'click the submit button', 'type hello@example.com in the email field',
+                     'scroll down', 'press Enter', 'select option from dropdown'
+
+    Returns:
+        str: JSON formatted string with action result and status information
+
+    Raises:
+        RuntimeError: If browser is not initialized or page is not available
+        Exception: If the action cannot be performed on the current page
+
+    Example:
+        >>> result = browser_act("click the submit button")
+        >>> print(result)
+        "Action performed: click the submit button. Result: clicked successfully"
+
+        >>> result = browser_act("type hello@example.com in the email field")
+        >>> print(result)
+        "Action performed: type hello@example.com in the email field. Result: text entered"
+    """
+    return asyncio.run(_browser_act_async(action))
+
+
+async def _browser_act_async(action: str) -> str:
+    """Async implementation of browser_act."""
+    try:
+        await browser_state.init_browser()
+        page = await browser_state.get_page()
+
+        result = await page.act(action)
+        return f"Action performed: {action}. Result: {result}"
+    except Exception as e:
+        logger.error(f"Action error: {str(e)}")
+        return f"Failed to perform action '{action}': {str(e)}"
+
+
+def browser_extract(query: str) -> str:
+    """
+    Extract information from the current web page using natural language.
+
+    Args:
+        query (str): Natural language description of what information to extract.
+                    Examples: 'extract all email addresses', 'get the main article text',
+                    'find all product prices', 'extract the page title and meta description'
+
+    Returns:
+        str: JSON formatted string containing the extracted information, or error message if extraction fails
+
+    Raises:
+        RuntimeError: If browser is not initialized or page is not available
+        Exception: If extraction fails due to page content or parsing issues
+
+    Example:
+        >>> result = browser_extract("extract all email addresses")
+        >>> print(result)
+        '["contact@example.com", "support@example.com"]'
+
+        >>> result = browser_extract("get the main article text")
+        >>> print(result)
+        '{"title": "Article Title", "content": "Article content..."}'
+    """
+    return asyncio.run(_browser_extract_async(query))
+
+
+async def _browser_extract_async(query: str) -> str:
+    """Async implementation of browser_extract."""
+    try:
+        await browser_state.init_browser()
+        page = await browser_state.get_page()
+
+        extracted = await page.extract(query)
+
+        # Convert to JSON string for agent consumption
+        if isinstance(extracted, (dict, list)):
+            return json.dumps(extracted, indent=2)
+        else:
+            return str(extracted)
+    except Exception as e:
+        logger.error(f"Extraction error: {str(e)}")
+        return f"Failed to extract '{query}': {str(e)}"
+
+
+def browser_observe(query: str) -> str:
+    """
+    Observe and find elements on the current web page using natural language.
+
+    Args:
+        query (str): Natural language description of elements to find.
+                    Examples: 'find the search box', 'locate the submit button',
+                    'find all navigation links', 'observe form elements'
+
+    Returns:
+        str: JSON formatted string containing information about found elements including
+             their descriptions, selectors, and interaction methods
+
+    Raises:
+        RuntimeError: If browser is not initialized or page is not available
+        Exception: If observation fails due to page structure or element detection issues
+
+    Example:
+        >>> result = browser_observe("find the search box")
+        >>> print(result)
+        '[{"description": "Search input field", "selector": "#search", "method": "input"}]'
+
+        >>> result = browser_observe("locate the submit button")
+        >>> print(result)
+        '[{"description": "Submit button", "selector": "button[type=submit]", "method": "click"}]'
+    """
+    return asyncio.run(_browser_observe_async(query))
+
+
+async def _browser_observe_async(query: str) -> str:
+    """Async implementation of browser_observe."""
+    try:
+        await browser_state.init_browser()
+        page = await browser_state.get_page()
+
+        observations = await page.observe(query)
+
+        # Format observations for readability
+        result = []
+        for obs in observations:
+            result.append(
+                {
+                    "description": obs.description,
+                    "selector": obs.selector,
+                    "method": obs.method,
+                }
+            )
+
+        return json.dumps(result, indent=2)
+    except Exception as e:
+        logger.error(f"Observation error: {str(e)}")
+        return f"Failed to observe '{query}': {str(e)}"
+
+
+def browser_screenshot(filename: str = "screenshot.png") -> str:
+    """
+    Take a screenshot of the current web page.
+
+    Args:
+        filename (str, optional): The filename to save the screenshot to.
+                                 Defaults to "screenshot.png". 
+                                 .png extension will be added automatically if not provided.
+
+    Returns:
+        str: Success message with the filename where screenshot was saved,
+             or error message if screenshot fails
+
+    Raises:
+        RuntimeError: If browser is not initialized or page is not available
+        Exception: If screenshot capture or file saving fails
+
+    Example:
+        >>> result = browser_screenshot()
+        >>> print(result)
+        "Screenshot saved to screenshot.png"
+
+        >>> result = browser_screenshot("page_capture.png")
+        >>> print(result)
+        "Screenshot saved to page_capture.png"
+    """
+    return asyncio.run(_browser_screenshot_async(filename))
+
+
+async def _browser_screenshot_async(filename: str) -> str:
+    """Async implementation of browser_screenshot."""
+    try:
+        await browser_state.init_browser()
+        page = await browser_state.get_page()
+
+        # Ensure .png extension
+        if not filename.endswith(".png"):
+            filename += ".png"
+
+        # Get the underlying Playwright page
+        playwright_page = page.page
+        await playwright_page.screenshot(path=filename)
+
+        return f"Screenshot saved to {filename}"
+    except Exception as e:
+        logger.error(f"Screenshot error: {str(e)}")
+        return f"Failed to take screenshot: {str(e)}"
+
+
+def close_browser() -> str:
+    """
+    Close the browser when done with automation tasks.
+
+    Returns:
+        str: Success message if browser is closed successfully,
+             or error message if closing fails
+
+    Raises:
+        Exception: If browser closing process encounters errors
+
+    Example:
+        >>> result = close_browser()
+        >>> print(result)
+        "Browser closed successfully"
+    """
+    return asyncio.run(_close_browser_async())
+
+
+async def _close_browser_async() -> str:
+    """Async implementation of close_browser."""
+    try:
+        await browser_state.close()
+        return "Browser closed successfully"
+    except Exception as e:
+        logger.error(f"Close browser error: {str(e)}")
+        return f"Failed to close browser: {str(e)}"
+
+
+# Example usage
+if __name__ == "__main__":
+    # Create a Swarms agent with browser tools
+    browser_agent = Agent(
+        agent_name="BrowserAutomationAgent",
+        model_name="gpt-4o-mini",
+        max_loops=1,
+        tools=[
+            navigate_browser,
+            browser_act,
+            browser_extract,
+            browser_observe,
+            browser_screenshot,
+            close_browser,
+        ],
+        system_prompt="""You are a web browser automation specialist. You can:
+        1. Navigate to websites using the navigate_browser tool
+        2. Perform actions like clicking and typing using the browser_act tool
+        3. Extract information from pages using the browser_extract tool
+        4. Find and observe elements using the browser_observe tool
+        5. Take screenshots using the browser_screenshot tool
+        6. Close the browser when done using the close_browser tool
+
+        Always start by navigating to a URL before trying to interact with a page.
+        Be specific in your actions and extractions. When done with tasks, close the browser.""",
+    )
+
+    # Example 1: Research task
+    print("Example 1: Automated web research")
+    result1 = browser_agent.run(
+        "Go to hackernews (news.ycombinator.com) and extract the titles of the top 5 stories. Then take a screenshot."
+    )
+    print(result1)
+    print("\n" + "=" * 50 + "\n")
+
+    # Example 2: Search task
+    print("Example 2: Perform a web search")
+    result2 = browser_agent.run(
+        "Navigate to google.com, search for 'Python web scraping best practices', and extract the first 3 search result titles"
+    )
+    print(result2)
+    print("\n" + "=" * 50 + "\n")
+
+    # Example 3: Form interaction
+    print("Example 3: Interact with a form")
+    result3 = browser_agent.run(
+        "Go to example.com and observe what elements are on the page. Then extract all the text content."
+    )
+    print(result3)
+
+    # Clean up
+    browser_agent.run("Close the browser")
--- a/examples/stagehand/3_stagehand_mcp_agent.py
+++ b/examples/stagehand/3_stagehand_mcp_agent.py
@ -0,0 +1,263 @@
+"""
+Stagehand MCP Server Integration with Swarms
+============================================
+
+This example demonstrates how to use the Stagehand MCP (Model Context Protocol)
+server with Swarms agents. The MCP server provides browser automation capabilities
+as standardized tools that can be discovered and used by agents.
+
+Prerequisites:
+1. Install and run the Stagehand MCP server:
+   cd stagehand-mcp-server
+   npm install
+   npm run build
+   npm start
+
+2. The server will start on http://localhost:3000/sse
+
+Features:
+- Automatic tool discovery from MCP server
+- Multi-session browser management
+- Built-in screenshot resources
+- Prompt templates for common tasks
+"""
+
+from typing import List
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from swarms import Agent
+
+load_dotenv()
+
+
+class StagehandMCPAgent:
+    """
+    A Swarms agent that connects to the Stagehand MCP server
+    for browser automation capabilities.
+    """
+
+    def __init__(
+        self,
+        agent_name: str = "StagehandMCPAgent",
+        mcp_server_url: str = "http://localhost:3000/sse",
+        model_name: str = "gpt-4o-mini",
+        max_loops: int = 1,
+    ):
+        """
+        Initialize the Stagehand MCP Agent.
+
+        Args:
+            agent_name: Name of the agent
+            mcp_server_url: URL of the Stagehand MCP server
+            model_name: LLM model to use
+            max_loops: Maximum number of reasoning loops
+        """
+        self.agent = Agent(
+            agent_name=agent_name,
+            model_name=model_name,
+            max_loops=max_loops,
+            # Connect to the Stagehand MCP server
+            mcp_url=mcp_server_url,
+            system_prompt="""You are a web browser automation specialist with access to Stagehand MCP tools.
+
+Available tools from the MCP server:
+- navigate: Navigate to a URL
+- act: Perform actions on web pages (click, type, etc.)
+- extract: Extract data from web pages
+- observe: Find and observe elements on pages
+- screenshot: Take screenshots
+- createSession: Create new browser sessions for parallel tasks
+- listSessions: List active browser sessions
+- closeSession: Close browser sessions
+
+For multi-page workflows, you can create multiple sessions.
+Always be specific in your actions and extractions.
+Remember to close sessions when done with them.""",
+            verbose=True,
+        )
+
+    def run(self, task: str) -> str:
+        """Run a browser automation task."""
+        return self.agent.run(task)
+
+
+class MultiSessionBrowserSwarm:
+    """
+    A multi-agent swarm that uses multiple browser sessions
+    for parallel web automation tasks.
+    """
+
+    def __init__(
+        self,
+        mcp_server_url: str = "http://localhost:3000/sse",
+        num_agents: int = 3,
+    ):
+        """
+        Initialize a swarm of browser automation agents.
+
+        Args:
+            mcp_server_url: URL of the Stagehand MCP server
+            num_agents: Number of agents to create
+        """
+        self.agents = []
+
+        # Create specialized agents for different tasks
+        agent_roles = [
+            (
+                "DataExtractor",
+                "You specialize in extracting structured data from websites.",
+            ),
+            (
+                "FormFiller",
+                "You specialize in filling out forms and interacting with web applications.",
+            ),
+            (
+                "WebMonitor",
+                "You specialize in monitoring websites for changes and capturing screenshots.",
+            ),
+        ]
+
+        for i in range(min(num_agents, len(agent_roles))):
+            name, specialization = agent_roles[i]
+            agent = Agent(
+                agent_name=f"{name}_{i}",
+                model_name="gpt-4o-mini",
+                max_loops=1,
+                mcp_url=mcp_server_url,
+                system_prompt=f"""You are a web browser automation specialist. {specialization}
+
+You have access to Stagehand MCP tools including:
+- createSession: Create a new browser session
+- navigate_session: Navigate to URLs in a specific session
+- act_session: Perform actions in a specific session
+- extract_session: Extract data from a specific session
+- observe_session: Observe elements in a specific session
+- closeSession: Close a session when done
+
+Always create your own session for tasks to work independently from other agents.""",
+                verbose=True,
+            )
+            self.agents.append(agent)
+
+    def distribute_tasks(self, tasks: List[str]) -> List[str]:
+        """Distribute tasks among agents."""
+        results = []
+
+        # Distribute tasks round-robin among agents
+        for i, task in enumerate(tasks):
+            agent_idx = i % len(self.agents)
+            agent = self.agents[agent_idx]
+
+            logger.info(
+                f"Assigning task to {agent.agent_name}: {task}"
+            )
+            result = agent.run(task)
+            results.append(result)
+
+        return results
+
+
+# Example usage
+if __name__ == "__main__":
+    print("=" * 70)
+    print("Stagehand MCP Server Integration Examples")
+    print("=" * 70)
+    print(
+        "\nMake sure the Stagehand MCP server is running on http://localhost:3000/sse"
+    )
+    print("Run: cd stagehand-mcp-server && npm start\n")
+
+    # Example 1: Single agent with MCP tools
+    print("\nExample 1: Single Agent with MCP Tools")
+    print("-" * 40)
+
+    mcp_agent = StagehandMCPAgent(
+        agent_name="WebResearchAgent",
+        mcp_server_url="http://localhost:3000/sse",
+    )
+
+    # Research task using MCP tools
+    result1 = mcp_agent.run(
+        """Navigate to news.ycombinator.com and extract the following:
+        1. The titles of the top 5 stories
+        2. Their points/scores
+        3. Number of comments for each
+        Then take a screenshot of the page."""
+    )
+    print(f"Result: {result1}")
+
+    print("\n" + "=" * 70 + "\n")
+
+    # Example 2: Multi-session parallel browsing
+    print("Example 2: Multi-Session Parallel Browsing")
+    print("-" * 40)
+
+    parallel_agent = StagehandMCPAgent(
+        agent_name="ParallelBrowserAgent",
+        mcp_server_url="http://localhost:3000/sse",
+    )
+
+    result2 = parallel_agent.run(
+        """Create 3 browser sessions and perform these tasks in parallel:
+        1. Session 1: Go to github.com/trending and extract the top 3 trending repositories
+        2. Session 2: Go to reddit.com/r/programming and extract the top 3 posts
+        3. Session 3: Go to stackoverflow.com and extract the featured questions
+        
+        After extracting data from all sessions, close them."""
+    )
+    print(f"Result: {result2}")
+
+    print("\n" + "=" * 70 + "\n")
+
+    # Example 3: Multi-agent browser swarm
+    print("Example 3: Multi-Agent Browser Swarm")
+    print("-" * 40)
+
+    # Create a swarm of specialized browser agents
+    browser_swarm = MultiSessionBrowserSwarm(
+        mcp_server_url="http://localhost:3000/sse",
+        num_agents=3,
+    )
+
+    # Define tasks for the swarm
+    swarm_tasks = [
+        "Create a session, navigate to python.org, and extract information about the latest Python version and its key features",
+        "Create a session, go to npmjs.com, search for 'stagehand', and extract information about the package including version and description",
+        "Create a session, visit playwright.dev, and extract the main features and benefits listed on the homepage",
+    ]
+
+    print("Distributing tasks to browser swarm...")
+    swarm_results = browser_swarm.distribute_tasks(swarm_tasks)
+
+    for i, result in enumerate(swarm_results):
+        print(f"\nTask {i+1} Result: {result}")
+
+    print("\n" + "=" * 70 + "\n")
+
+    # Example 4: Complex workflow with session management
+    print("Example 4: Complex Multi-Page Workflow")
+    print("-" * 40)
+
+    workflow_agent = StagehandMCPAgent(
+        agent_name="WorkflowAgent",
+        mcp_server_url="http://localhost:3000/sse",
+        max_loops=2,  # Allow more complex reasoning
+    )
+
+    result4 = workflow_agent.run(
+        """Perform a comprehensive analysis of AI frameworks:
+        1. Create a new session
+        2. Navigate to github.com/huggingface/transformers and extract the star count and latest release info
+        3. In the same session, navigate to github.com/openai/gpt-3 and extract similar information
+        4. Navigate to github.com/anthropics/anthropic-sdk-python and extract repository statistics
+        5. Take screenshots of each repository page
+        6. Compile a comparison report of all three repositories
+        7. Close the session when done"""
+    )
+    print(f"Result: {result4}")
+
+    print("\n" + "=" * 70)
+    print("All examples completed!")
+    print("=" * 70)
--- a/examples/stagehand/4_stagehand_multi_agent_workflow.py
+++ b/examples/stagehand/4_stagehand_multi_agent_workflow.py
@ -0,0 +1,371 @@
+"""
+Stagehand Multi-Agent Browser Automation Workflows
+=================================================
+
+This example demonstrates advanced multi-agent workflows using Stagehand
+for complex browser automation scenarios. It shows how multiple agents
+can work together to accomplish sophisticated web tasks.
+
+Use cases:
+1. E-commerce price monitoring across multiple sites
+2. Competitive analysis and market research
+3. Automated testing and validation workflows
+4. Data aggregation from multiple sources
+"""
+
+from datetime import datetime
+from typing import Dict, List, Optional
+
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+
+from swarms import Agent, SequentialWorkflow, ConcurrentWorkflow
+from swarms.structs.agent_rearrange import AgentRearrange
+from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
+
+load_dotenv()
+
+
+# Pydantic models for structured data
+class ProductInfo(BaseModel):
+    """Product information schema."""
+
+    name: str = Field(..., description="Product name")
+    price: float = Field(..., description="Product price")
+    availability: str = Field(..., description="Availability status")
+    url: str = Field(..., description="Product URL")
+    screenshot_path: Optional[str] = Field(
+        None, description="Screenshot file path"
+    )
+
+
+class MarketAnalysis(BaseModel):
+    """Market analysis report schema."""
+
+    timestamp: datetime = Field(default_factory=datetime.now)
+    products: List[ProductInfo] = Field(
+        ..., description="List of products analyzed"
+    )
+    price_range: Dict[str, float] = Field(
+        ..., description="Min and max prices"
+    )
+    recommendations: List[str] = Field(
+        ..., description="Analysis recommendations"
+    )
+
+
+# Specialized browser agents
+class ProductScraperAgent(StagehandAgent):
+    """Specialized agent for scraping product information."""
+
+    def __init__(self, site_name: str, *args, **kwargs):
+        super().__init__(
+            agent_name=f"ProductScraper_{site_name}", *args, **kwargs
+        )
+        self.site_name = site_name
+
+
+class PriceMonitorAgent(StagehandAgent):
+    """Specialized agent for monitoring price changes."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(
+            agent_name="PriceMonitorAgent", *args, **kwargs
+        )
+
+
+# Example 1: E-commerce Price Comparison Workflow
+def create_price_comparison_workflow():
+    """
+    Create a workflow that compares prices across multiple e-commerce sites.
+    """
+
+    # Create specialized agents for different sites
+    amazon_agent = StagehandAgent(
+        agent_name="AmazonScraperAgent",
+        model_name="gpt-4o-mini",
+        env="LOCAL",
+    )
+
+    ebay_agent = StagehandAgent(
+        agent_name="EbayScraperAgent",
+        model_name="gpt-4o-mini",
+        env="LOCAL",
+    )
+
+    analysis_agent = Agent(
+        agent_name="PriceAnalysisAgent",
+        model_name="gpt-4o-mini",
+        system_prompt="""You are a price analysis expert. Analyze product prices from multiple sources
+        and provide insights on the best deals, price trends, and recommendations.
+        Focus on value for money and highlight any significant price differences.""",
+    )
+
+    # Create concurrent workflow for parallel scraping
+    scraping_workflow = ConcurrentWorkflow(
+        agents=[amazon_agent, ebay_agent],
+        max_loops=1,
+        verbose=True,
+    )
+
+    # Create sequential workflow: scrape -> analyze
+    full_workflow = SequentialWorkflow(
+        agents=[scraping_workflow, analysis_agent],
+        max_loops=1,
+        verbose=True,
+    )
+
+    return full_workflow
+
+
+# Example 2: Competitive Analysis Workflow
+def create_competitive_analysis_workflow():
+    """
+    Create a workflow for competitive analysis across multiple company websites.
+    """
+
+    # Agent for extracting company information
+    company_researcher = StagehandAgent(
+        agent_name="CompanyResearchAgent",
+        model_name="gpt-4o-mini",
+        env="LOCAL",
+    )
+
+    # Agent for analyzing social media presence
+    social_media_agent = StagehandAgent(
+        agent_name="SocialMediaAnalysisAgent",
+        model_name="gpt-4o-mini",
+        env="LOCAL",
+    )
+
+    # Agent for compiling competitive analysis report
+    report_compiler = Agent(
+        agent_name="CompetitiveAnalysisReporter",
+        model_name="gpt-4o-mini",
+        system_prompt="""You are a competitive analysis expert. Compile comprehensive reports
+        based on company information and social media presence data. Identify strengths,
+        weaknesses, and market positioning for each company.""",
+    )
+
+    # Create agent rearrange for flexible routing
+    workflow_pattern = (
+        "company_researcher -> social_media_agent -> report_compiler"
+    )
+
+    competitive_workflow = AgentRearrange(
+        agents=[
+            company_researcher,
+            social_media_agent,
+            report_compiler,
+        ],
+        flow=workflow_pattern,
+        verbose=True,
+    )
+
+    return competitive_workflow
+
+
+# Example 3: Automated Testing Workflow
+def create_automated_testing_workflow():
+    """
+    Create a workflow for automated web application testing.
+    """
+
+    # Agent for UI testing
+    ui_tester = StagehandAgent(
+        agent_name="UITestingAgent",
+        model_name="gpt-4o-mini",
+        env="LOCAL",
+    )
+
+    # Agent for form validation testing
+    form_tester = StagehandAgent(
+        agent_name="FormValidationAgent",
+        model_name="gpt-4o-mini",
+        env="LOCAL",
+    )
+
+    # Agent for accessibility testing
+    accessibility_tester = StagehandAgent(
+        agent_name="AccessibilityTestingAgent",
+        model_name="gpt-4o-mini",
+        env="LOCAL",
+    )
+
+    # Agent for compiling test results
+    test_reporter = Agent(
+        agent_name="TestReportCompiler",
+        model_name="gpt-4o-mini",
+        system_prompt="""You are a QA test report specialist. Compile test results from
+        UI, form validation, and accessibility testing into a comprehensive report.
+        Highlight any failures, warnings, and provide recommendations for fixes.""",
+    )
+
+    # Concurrent testing followed by report generation
+    testing_workflow = ConcurrentWorkflow(
+        agents=[ui_tester, form_tester, accessibility_tester],
+        max_loops=1,
+        verbose=True,
+    )
+
+    full_test_workflow = SequentialWorkflow(
+        agents=[testing_workflow, test_reporter],
+        max_loops=1,
+        verbose=True,
+    )
+
+    return full_test_workflow
+
+
+# Example 4: News Aggregation and Sentiment Analysis
+def create_news_aggregation_workflow():
+    """
+    Create a workflow for news aggregation and sentiment analysis.
+    """
+
+    # Multiple news scraper agents
+    news_scrapers = []
+    news_sites = [
+        ("TechCrunch", "https://techcrunch.com"),
+        ("HackerNews", "https://news.ycombinator.com"),
+        ("Reddit", "https://reddit.com/r/technology"),
+    ]
+
+    for site_name, url in news_sites:
+        scraper = StagehandAgent(
+            agent_name=f"{site_name}Scraper",
+            model_name="gpt-4o-mini",
+            env="LOCAL",
+        )
+        news_scrapers.append(scraper)
+
+    # Sentiment analysis agent
+    sentiment_analyzer = Agent(
+        agent_name="SentimentAnalyzer",
+        model_name="gpt-4o-mini",
+        system_prompt="""You are a sentiment analysis expert. Analyze news articles and posts
+        to determine overall sentiment (positive, negative, neutral) and identify key themes
+        and trends in the technology sector.""",
+    )
+
+    # Trend identification agent
+    trend_identifier = Agent(
+        agent_name="TrendIdentifier",
+        model_name="gpt-4o-mini",
+        system_prompt="""You are a trend analysis expert. Based on aggregated news and sentiment
+        data, identify emerging trends, hot topics, and potential market movements in the
+        technology sector.""",
+    )
+
+    # Create workflow: parallel scraping -> sentiment analysis -> trend identification
+    scraping_workflow = ConcurrentWorkflow(
+        agents=news_scrapers,
+        max_loops=1,
+        verbose=True,
+    )
+
+    analysis_workflow = SequentialWorkflow(
+        agents=[
+            scraping_workflow,
+            sentiment_analyzer,
+            trend_identifier,
+        ],
+        max_loops=1,
+        verbose=True,
+    )
+
+    return analysis_workflow
+
+
+# Main execution examples
+if __name__ == "__main__":
+    print("=" * 70)
+    print("Stagehand Multi-Agent Workflow Examples")
+    print("=" * 70)
+
+    # Example 1: Price Comparison
+    print("\nExample 1: E-commerce Price Comparison")
+    print("-" * 40)
+
+    price_workflow = create_price_comparison_workflow()
+
+    # Search for a specific product across multiple sites
+    price_result = price_workflow.run(
+        """Search for 'iPhone 15 Pro Max 256GB' on:
+        1. Amazon - extract price, availability, and seller information
+        2. eBay - extract price range, number of listings, and average price
+        Take screenshots of search results from both sites.
+        Compare the prices and provide recommendations on where to buy."""
+    )
+    print(f"Price Comparison Result:\n{price_result}")
+
+    print("\n" + "=" * 70 + "\n")
+
+    # Example 2: Competitive Analysis
+    print("Example 2: Competitive Analysis")
+    print("-" * 40)
+
+    competitive_workflow = create_competitive_analysis_workflow()
+
+    competitive_result = competitive_workflow.run(
+        """Analyze these three AI companies:
+        1. OpenAI - visit openai.com and extract mission, products, and recent announcements
+        2. Anthropic - visit anthropic.com and extract their AI safety approach and products
+        3. DeepMind - visit deepmind.com and extract research focus and achievements
+        
+        Then check their Twitter/X presence and recent posts.
+        Compile a competitive analysis report comparing their market positioning."""
+    )
+    print(f"Competitive Analysis Result:\n{competitive_result}")
+
+    print("\n" + "=" * 70 + "\n")
+
+    # Example 3: Automated Testing
+    print("Example 3: Automated Web Testing")
+    print("-" * 40)
+
+    testing_workflow = create_automated_testing_workflow()
+
+    test_result = testing_workflow.run(
+        """Test the website example.com:
+        1. UI Testing: Check if all main navigation links work, images load, and layout is responsive
+        2. Form Testing: If there are any forms, test with valid and invalid inputs
+        3. Accessibility: Check for alt texts, ARIA labels, and keyboard navigation
+        
+        Take screenshots of any issues found and compile a comprehensive test report."""
+    )
+    print(f"Test Results:\n{test_result}")
+
+    print("\n" + "=" * 70 + "\n")
+
+    # Example 4: News Aggregation
+    print("Example 4: Tech News Aggregation and Analysis")
+    print("-" * 40)
+
+    news_workflow = create_news_aggregation_workflow()
+
+    news_result = news_workflow.run(
+        """For each news source:
+        1. TechCrunch: Extract the top 5 headlines about AI or machine learning
+        2. HackerNews: Extract the top 5 posts related to AI/ML with most points
+        3. Reddit r/technology: Extract top 5 posts about AI from the past week
+        
+        Analyze sentiment and identify emerging trends in AI technology."""
+    )
+    print(f"News Analysis Result:\n{news_result}")
+
+    # Cleanup all browser instances
+    print("\n" + "=" * 70)
+    print("Cleaning up browser instances...")
+
+    # Clean up agents
+    for agent in price_workflow.agents:
+        if isinstance(agent, StagehandAgent):
+            agent.cleanup()
+        elif hasattr(agent, "agents"):  # For nested workflows
+            for sub_agent in agent.agents:
+                if isinstance(sub_agent, StagehandAgent):
+                    sub_agent.cleanup()
+
+    print("All workflows completed!")
+    print("=" * 70)
--- a/examples/stagehand/README.md
+++ b/examples/stagehand/README.md
@ -0,0 +1,249 @@
+# Stagehand Browser Automation Integration for Swarms
+
+This directory contains examples demonstrating how to integrate [Stagehand](https://github.com/browserbase/stagehand), an AI-powered browser automation framework, with the Swarms multi-agent framework.
+
+## Overview
+
+Stagehand provides natural language browser automation capabilities that can be seamlessly integrated into Swarms agents. This integration enables:
+
+- 🌐 **Natural Language Web Automation**: Use simple commands like "click the submit button" or "extract product prices"
+- 🤖 **Multi-Agent Browser Workflows**: Multiple agents can automate different websites simultaneously
+- 🔧 **Flexible Integration Options**: Use as a wrapped agent, individual tools, or via MCP server
+- 📊 **Complex Automation Scenarios**: E-commerce monitoring, competitive analysis, automated testing, and more
+
+## Examples
+
+### 1. Stagehand Wrapper Agent (`1_stagehand_wrapper_agent.py`)
+
+The simplest integration - wraps Stagehand as a Swarms-compatible agent.
+
+```python
+from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
+
+# Create a browser automation agent
+browser_agent = StagehandAgent(
+    agent_name="WebScraperAgent",
+    model_name="gpt-4o-mini",
+    env="LOCAL",  # or "BROWSERBASE" for cloud execution
+)
+
+# Use natural language to control the browser
+result = browser_agent.run(
+    "Navigate to news.ycombinator.com and extract the top 5 story titles"
+)
+```
+
+**Features:**
+- Inherits from Swarms `Agent` base class
+- Automatic browser lifecycle management
+- Natural language task interpretation
+- Support for both local (Playwright) and cloud (Browserbase) execution
+
+### 2. Stagehand as Tools (`2_stagehand_tools_agent.py`)
+
+Provides fine-grained control by exposing Stagehand methods as individual tools.
+
+```python
+from swarms import Agent
+from examples.stagehand.stagehand_tools_agent import (
+    NavigateTool, ActTool, ExtractTool, ObserveTool, ScreenshotTool
+)
+
+# Create agent with browser tools
+browser_agent = Agent(
+    agent_name="BrowserAutomationAgent",
+    model_name="gpt-4o-mini",
+    tools=[
+        NavigateTool(),
+        ActTool(),
+        ExtractTool(),
+        ObserveTool(),
+        ScreenshotTool(),
+    ],
+)
+
+# Agent can now use tools strategically
+result = browser_agent.run(
+    "Go to google.com, search for 'Python tutorials', and extract the first 3 results"
+)
+```
+
+**Available Tools:**
+- `NavigateTool`: Navigate to URLs
+- `ActTool`: Perform actions (click, type, scroll)
+- `ExtractTool`: Extract data from pages
+- `ObserveTool`: Find elements on pages
+- `ScreenshotTool`: Capture screenshots
+- `CloseBrowserTool`: Clean up browser resources
+
+### 3. Stagehand MCP Server (`3_stagehand_mcp_agent.py`)
+
+Integrates with Stagehand's Model Context Protocol (MCP) server for standardized tool access.
+
+```python
+from examples.stagehand.stagehand_mcp_agent import StagehandMCPAgent
+
+# Connect to Stagehand MCP server
+mcp_agent = StagehandMCPAgent(
+    agent_name="WebResearchAgent",
+    mcp_server_url="http://localhost:3000/sse",
+)
+
+# Use MCP tools including multi-session management
+result = mcp_agent.run("""
+    Create 3 browser sessions and:
+    1. Session 1: Check Python.org for latest version
+    2. Session 2: Check PyPI for trending packages  
+    3. Session 3: Check GitHub Python trending repos
+    Compile a Python ecosystem status report.
+""")
+```
+
+**MCP Features:**
+- Automatic tool discovery
+- Multi-session browser management
+- Built-in screenshot resources
+- Prompt templates for common tasks
+
+### 4. Multi-Agent Workflows (`4_stagehand_multi_agent_workflow.py`)
+
+Demonstrates complex multi-agent browser automation scenarios.
+
+```python
+from examples.stagehand.stagehand_multi_agent_workflow import (
+    create_price_comparison_workflow,
+    create_competitive_analysis_workflow,
+    create_automated_testing_workflow,
+    create_news_aggregation_workflow
+)
+
+# Price comparison across multiple e-commerce sites
+price_workflow = create_price_comparison_workflow()
+result = price_workflow.run(
+    "Compare prices for iPhone 15 Pro on Amazon and eBay"
+)
+
+# Competitive analysis of multiple companies
+competitive_workflow = create_competitive_analysis_workflow()
+result = competitive_workflow.run(
+    "Analyze OpenAI, Anthropic, and DeepMind websites and social media"
+)
+```
+
+**Workflow Examples:**
+- **E-commerce Monitoring**: Track prices across multiple sites
+- **Competitive Analysis**: Research competitors' websites and social media
+- **Automated Testing**: UI, form validation, and accessibility testing
+- **News Aggregation**: Collect and analyze news from multiple sources
+
+## Setup
+
+### Prerequisites
+
+1. **Install Swarms and Stagehand:**
+```bash
+pip install swarms stagehand
+```
+
+2. **Set up environment variables:**
+```bash
+# For local browser automation (using Playwright)
+export OPENAI_API_KEY="your-openai-key"
+
+# For cloud browser automation (using Browserbase)
+export BROWSERBASE_API_KEY="your-browserbase-key"
+export BROWSERBASE_PROJECT_ID="your-project-id"
+```
+
+3. **For MCP Server examples:**
+```bash
+# Install and run the Stagehand MCP server
+cd stagehand-mcp-server
+npm install
+npm run build
+npm start
+```
+
+## Use Cases
+
+### E-commerce Automation
+- Price monitoring and comparison
+- Inventory tracking
+- Automated purchasing workflows
+- Review aggregation
+
+### Research and Analysis
+- Competitive intelligence gathering
+- Market research automation
+- Social media monitoring
+- News and trend analysis
+
+### Quality Assurance
+- Automated UI testing
+- Cross-browser compatibility testing
+- Form validation testing
+- Accessibility compliance checking
+
+### Data Collection
+- Web scraping at scale
+- Real-time data monitoring
+- Structured data extraction
+- Screenshot documentation
+
+## Best Practices
+
+1. **Resource Management**: Always clean up browser instances when done
+```python
+browser_agent.cleanup()  # For wrapper agents
+```
+
+2. **Error Handling**: Stagehand includes self-healing capabilities, but wrap critical operations in try-except blocks
+
+3. **Parallel Execution**: Use `ConcurrentWorkflow` for simultaneous browser automation across multiple sites
+
+4. **Session Management**: For complex multi-page workflows, use the MCP server's session management capabilities
+
+5. **Rate Limiting**: Be respectful of websites - add delays between requests when necessary
+
+## Testing
+
+Run the test suite to verify the integration:
+
+```bash
+pytest tests/stagehand/test_stagehand_integration.py -v
+```
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Browser not starting**: Ensure Playwright is properly installed
+```bash
+playwright install
+```
+
+2. **MCP connection failed**: Verify the MCP server is running on the correct port
+
+3. **Timeout errors**: Increase timeout in StagehandConfig or agent initialization
+
+### Debug Mode
+
+Enable verbose logging:
+```python
+agent = StagehandAgent(
+    agent_name="DebugAgent",
+    verbose=True,  # Enable detailed logging
+)
+```
+
+## Contributing
+
+We welcome contributions! Please:
+1. Follow the existing code style
+2. Add tests for new features
+3. Update documentation
+4. Submit PRs with clear descriptions
+
+## License
+
+These examples are provided under the same license as the Swarms framework. Stagehand is licensed separately - see [Stagehand's repository](https://github.com/browserbase/stagehand) for details.
--- a/examples/stagehand/requirements.txt
+++ b/examples/stagehand/requirements.txt
@ -0,0 +1,13 @@
+# Requirements for Stagehand integration examples
+swarms>=8.0.0
+stagehand>=0.1.0
+python-dotenv>=1.0.0
+pydantic>=2.0.0
+loguru>=0.7.0
+
+# For MCP server examples (optional)
+httpx>=0.24.0
+
+# For testing
+pytest>=7.0.0
+pytest-asyncio>=0.21.0
--- a/tests/stagehand/test_stagehand_integration.py
+++ b/tests/stagehand/test_stagehand_integration.py
@ -0,0 +1,436 @@
+"""
+Tests for Stagehand Integration with Swarms
+==========================================
+
+This module contains tests for the Stagehand browser automation
+integration with the Swarms framework.
+"""
+
+import json
+import pytest
+from unittest.mock import AsyncMock, patch
+
+
+# Mock Stagehand classes
+class MockObserveResult:
+    def __init__(self, description, selector, method="click"):
+        self.description = description
+        self.selector = selector
+        self.method = method
+
+
+class MockStagehandPage:
+    async def goto(self, url):
+        return None
+
+    async def act(self, action):
+        return f"Performed action: {action}"
+
+    async def extract(self, query):
+        return {"extracted": query, "data": ["item1", "item2"]}
+
+    async def observe(self, query):
+        return [
+            MockObserveResult("Search box", "#search-input"),
+            MockObserveResult("Submit button", "#submit-btn"),
+        ]
+
+
+class MockStagehand:
+    def __init__(self, config):
+        self.config = config
+        self.page = MockStagehandPage()
+
+    async def init(self):
+        pass
+
+    async def close(self):
+        pass
+
+
+# Test StagehandAgent wrapper
+class TestStagehandAgent:
+    """Test the StagehandAgent wrapper class."""
+
+    @patch(
+        "examples.stagehand.stagehand_wrapper_agent.Stagehand",
+        MockStagehand,
+    )
+    def test_agent_initialization(self):
+        """Test that StagehandAgent initializes correctly."""
+        from examples.stagehand.stagehand_wrapper_agent import (
+            StagehandAgent,
+        )
+
+        agent = StagehandAgent(
+            agent_name="TestAgent",
+            model_name="gpt-4o-mini",
+            env="LOCAL",
+        )
+
+        assert agent.agent_name == "TestAgent"
+        assert agent.stagehand_config.env == "LOCAL"
+        assert agent.stagehand_config.model_name == "gpt-4o-mini"
+        assert not agent._initialized
+
+    @patch(
+        "examples.stagehand.stagehand_wrapper_agent.Stagehand",
+        MockStagehand,
+    )
+    def test_navigation_task(self):
+        """Test navigation and extraction task."""
+        from examples.stagehand.stagehand_wrapper_agent import (
+            StagehandAgent,
+        )
+
+        agent = StagehandAgent(
+            agent_name="TestAgent",
+            model_name="gpt-4o-mini",
+            env="LOCAL",
+        )
+
+        result = agent.run(
+            "Navigate to example.com and extract the main content"
+        )
+
+        # Parse result
+        result_data = json.loads(result)
+        assert result_data["status"] == "completed"
+        assert "navigated_to" in result_data["data"]
+        assert (
+            result_data["data"]["navigated_to"]
+            == "https://example.com"
+        )
+        assert "extracted" in result_data["data"]
+
+    @patch(
+        "examples.stagehand.stagehand_wrapper_agent.Stagehand",
+        MockStagehand,
+    )
+    def test_search_task(self):
+        """Test search functionality."""
+        from examples.stagehand.stagehand_wrapper_agent import (
+            StagehandAgent,
+        )
+
+        agent = StagehandAgent(
+            agent_name="TestAgent",
+            model_name="gpt-4o-mini",
+            env="LOCAL",
+        )
+
+        result = agent.run(
+            "Go to google.com and search for 'test query'"
+        )
+
+        result_data = json.loads(result)
+        assert result_data["status"] == "completed"
+        assert result_data["data"]["search_query"] == "test query"
+        assert result_data["action"] == "search"
+
+    @patch(
+        "examples.stagehand.stagehand_wrapper_agent.Stagehand",
+        MockStagehand,
+    )
+    def test_cleanup(self):
+        """Test that cleanup properly closes browser."""
+        from examples.stagehand.stagehand_wrapper_agent import (
+            StagehandAgent,
+        )
+
+        agent = StagehandAgent(
+            agent_name="TestAgent",
+            model_name="gpt-4o-mini",
+            env="LOCAL",
+        )
+
+        # Initialize the agent
+        agent.run("Navigate to example.com")
+        assert agent._initialized
+
+        # Cleanup
+        agent.cleanup()
+
+        # After cleanup, should be able to run again
+        result = agent.run("Navigate to example.com")
+        assert result is not None
+
+
+# Test Stagehand Tools
+class TestStagehandTools:
+    """Test individual Stagehand tools."""
+
+    @patch("examples.stagehand.stagehand_tools_agent.browser_state")
+    async def test_navigate_tool(self, mock_browser_state):
+        """Test NavigateTool functionality."""
+        from examples.stagehand.stagehand_tools_agent import (
+            NavigateTool,
+        )
+
+        # Setup mock
+        mock_page = AsyncMock()
+        mock_browser_state.get_page = AsyncMock(
+            return_value=mock_page
+        )
+        mock_browser_state.init_browser = AsyncMock()
+
+        tool = NavigateTool()
+        result = await tool._async_run("https://example.com")
+
+        assert (
+            "Successfully navigated to https://example.com" in result
+        )
+        mock_page.goto.assert_called_once_with("https://example.com")
+
+    @patch("examples.stagehand.stagehand_tools_agent.browser_state")
+    async def test_act_tool(self, mock_browser_state):
+        """Test ActTool functionality."""
+        from examples.stagehand.stagehand_tools_agent import ActTool
+
+        # Setup mock
+        mock_page = AsyncMock()
+        mock_page.act = AsyncMock(return_value="Action completed")
+        mock_browser_state.get_page = AsyncMock(
+            return_value=mock_page
+        )
+        mock_browser_state.init_browser = AsyncMock()
+
+        tool = ActTool()
+        result = await tool._async_run("click the button")
+
+        assert "Action performed" in result
+        assert "click the button" in result
+        mock_page.act.assert_called_once_with("click the button")
+
+    @patch("examples.stagehand.stagehand_tools_agent.browser_state")
+    async def test_extract_tool(self, mock_browser_state):
+        """Test ExtractTool functionality."""
+        from examples.stagehand.stagehand_tools_agent import (
+            ExtractTool,
+        )
+
+        # Setup mock
+        mock_page = AsyncMock()
+        mock_page.extract = AsyncMock(
+            return_value={
+                "title": "Test Page",
+                "content": "Test content",
+            }
+        )
+        mock_browser_state.get_page = AsyncMock(
+            return_value=mock_page
+        )
+        mock_browser_state.init_browser = AsyncMock()
+
+        tool = ExtractTool()
+        result = await tool._async_run("extract the page title")
+
+        # Result should be JSON string
+        parsed_result = json.loads(result)
+        assert parsed_result["title"] == "Test Page"
+        assert parsed_result["content"] == "Test content"
+
+    @patch("examples.stagehand.stagehand_tools_agent.browser_state")
+    async def test_observe_tool(self, mock_browser_state):
+        """Test ObserveTool functionality."""
+        from examples.stagehand.stagehand_tools_agent import (
+            ObserveTool,
+        )
+
+        # Setup mock
+        mock_page = AsyncMock()
+        mock_observations = [
+            MockObserveResult("Search input", "#search"),
+            MockObserveResult("Submit button", "#submit"),
+        ]
+        mock_page.observe = AsyncMock(return_value=mock_observations)
+        mock_browser_state.get_page = AsyncMock(
+            return_value=mock_page
+        )
+        mock_browser_state.init_browser = AsyncMock()
+
+        tool = ObserveTool()
+        result = await tool._async_run("find the search box")
+
+        # Result should be JSON string
+        parsed_result = json.loads(result)
+        assert len(parsed_result) == 2
+        assert parsed_result[0]["description"] == "Search input"
+        assert parsed_result[0]["selector"] == "#search"
+
+
+# Test MCP integration
+class TestStagehandMCP:
+    """Test Stagehand MCP server integration."""
+
+    def test_mcp_agent_initialization(self):
+        """Test that MCP agent initializes with correct parameters."""
+        from examples.stagehand.stagehand_mcp_agent import (
+            StagehandMCPAgent,
+        )
+
+        mcp_agent = StagehandMCPAgent(
+            agent_name="TestMCPAgent",
+            mcp_server_url="http://localhost:3000/sse",
+            model_name="gpt-4o-mini",
+        )
+
+        assert mcp_agent.agent.agent_name == "TestMCPAgent"
+        assert mcp_agent.agent.mcp_url == "http://localhost:3000/sse"
+        assert mcp_agent.agent.model_name == "gpt-4o-mini"
+
+    def test_multi_session_swarm_creation(self):
+        """Test multi-session browser swarm creation."""
+        from examples.stagehand.stagehand_mcp_agent import (
+            MultiSessionBrowserSwarm,
+        )
+
+        swarm = MultiSessionBrowserSwarm(
+            mcp_server_url="http://localhost:3000/sse",
+            num_agents=3,
+        )
+
+        assert len(swarm.agents) == 3
+        assert swarm.agents[0].agent_name == "DataExtractor_0"
+        assert swarm.agents[1].agent_name == "FormFiller_1"
+        assert swarm.agents[2].agent_name == "WebMonitor_2"
+
+    @patch("swarms.Agent.run")
+    def test_task_distribution(self, mock_run):
+        """Test task distribution among swarm agents."""
+        from examples.stagehand.stagehand_mcp_agent import (
+            MultiSessionBrowserSwarm,
+        )
+
+        mock_run.return_value = "Task completed"
+
+        swarm = MultiSessionBrowserSwarm(num_agents=2)
+        tasks = ["Task 1", "Task 2", "Task 3"]
+
+        results = swarm.distribute_tasks(tasks)
+
+        assert len(results) == 3
+        assert all(result == "Task completed" for result in results)
+        assert mock_run.call_count == 3
+
+
+# Test multi-agent workflows
+class TestMultiAgentWorkflows:
+    """Test multi-agent workflow configurations."""
+
+    @patch(
+        "examples.stagehand.stagehand_wrapper_agent.Stagehand",
+        MockStagehand,
+    )
+    def test_price_comparison_workflow_creation(self):
+        """Test creation of price comparison workflow."""
+        from examples.stagehand.stagehand_multi_agent_workflow import (
+            create_price_comparison_workflow,
+        )
+
+        workflow = create_price_comparison_workflow()
+
+        # Should be a SequentialWorkflow with 2 agents
+        assert len(workflow.agents) == 2
+        # First agent should be a ConcurrentWorkflow
+        assert hasattr(workflow.agents[0], "agents")
+        # Second agent should be the analysis agent
+        assert workflow.agents[1].agent_name == "PriceAnalysisAgent"
+
+    @patch(
+        "examples.stagehand.stagehand_wrapper_agent.Stagehand",
+        MockStagehand,
+    )
+    def test_competitive_analysis_workflow_creation(self):
+        """Test creation of competitive analysis workflow."""
+        from examples.stagehand.stagehand_multi_agent_workflow import (
+            create_competitive_analysis_workflow,
+        )
+
+        workflow = create_competitive_analysis_workflow()
+
+        # Should have 3 agents in the rearrange pattern
+        assert len(workflow.agents) == 3
+        assert (
+            workflow.flow
+            == "company_researcher -> social_media_agent -> report_compiler"
+        )
+
+    @patch(
+        "examples.stagehand.stagehand_wrapper_agent.Stagehand",
+        MockStagehand,
+    )
+    def test_automated_testing_workflow_creation(self):
+        """Test creation of automated testing workflow."""
+        from examples.stagehand.stagehand_multi_agent_workflow import (
+            create_automated_testing_workflow,
+        )
+
+        workflow = create_automated_testing_workflow()
+
+        # Should be a SequentialWorkflow
+        assert len(workflow.agents) == 2
+        # First should be concurrent testing
+        assert hasattr(workflow.agents[0], "agents")
+        assert (
+            len(workflow.agents[0].agents) == 3
+        )  # UI, Form, Accessibility testers
+
+    @patch(
+        "examples.stagehand.stagehand_wrapper_agent.Stagehand",
+        MockStagehand,
+    )
+    def test_news_aggregation_workflow_creation(self):
+        """Test creation of news aggregation workflow."""
+        from examples.stagehand.stagehand_multi_agent_workflow import (
+            create_news_aggregation_workflow,
+        )
+
+        workflow = create_news_aggregation_workflow()
+
+        # Should be a SequentialWorkflow with 3 stages
+        assert len(workflow.agents) == 3
+        # First stage should be concurrent scrapers
+        assert hasattr(workflow.agents[0], "agents")
+        assert len(workflow.agents[0].agents) == 3  # 3 news sources
+
+
+# Integration tests
+class TestIntegration:
+    """End-to-end integration tests."""
+
+    @pytest.mark.asyncio
+    @patch(
+        "examples.stagehand.stagehand_wrapper_agent.Stagehand",
+        MockStagehand,
+    )
+    async def test_full_browser_automation_flow(self):
+        """Test a complete browser automation flow."""
+        from examples.stagehand.stagehand_wrapper_agent import (
+            StagehandAgent,
+        )
+
+        agent = StagehandAgent(
+            agent_name="IntegrationTestAgent",
+            model_name="gpt-4o-mini",
+            env="LOCAL",
+        )
+
+        # Test navigation
+        nav_result = agent.run("Navigate to example.com")
+        assert "navigated_to" in nav_result
+
+        # Test extraction
+        extract_result = agent.run("Extract all text from the page")
+        assert "extracted" in extract_result
+
+        # Test observation
+        observe_result = agent.run("Find all buttons on the page")
+        assert "observation" in observe_result
+
+        # Cleanup
+        agent.cleanup()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/tests/stagehand/test_stagehand_simple.py
+++ b/tests/stagehand/test_stagehand_simple.py
@ -0,0 +1,302 @@
+"""
+Simple tests for Stagehand Integration with Swarms
+=================================================
+
+These tests verify the basic structure and functionality of the
+Stagehand integration without requiring external dependencies.
+"""
+
+import json
+import pytest
+from unittest.mock import MagicMock
+
+
+class TestStagehandIntegrationStructure:
+    """Test that integration files have correct structure."""
+
+    def test_examples_directory_exists(self):
+        """Test that examples directory structure is correct."""
+        import os
+
+        base_path = "examples/stagehand"
+        assert os.path.exists(base_path)
+
+        expected_files = [
+            "1_stagehand_wrapper_agent.py",
+            "2_stagehand_tools_agent.py",
+            "3_stagehand_mcp_agent.py",
+            "4_stagehand_multi_agent_workflow.py",
+            "README.md",
+            "requirements.txt",
+        ]
+
+        for file in expected_files:
+            file_path = os.path.join(base_path, file)
+            assert os.path.exists(file_path), f"Missing file: {file}"
+
+    def test_wrapper_agent_imports(self):
+        """Test that wrapper agent has correct imports."""
+        with open(
+            "examples/stagehand/1_stagehand_wrapper_agent.py", "r"
+        ) as f:
+            content = f.read()
+
+        # Check for required imports
+        assert "from swarms import Agent" in content
+        assert "import asyncio" in content
+        assert "import json" in content
+        assert "class StagehandAgent" in content
+
+    def test_tools_agent_imports(self):
+        """Test that tools agent has correct imports."""
+        with open(
+            "examples/stagehand/2_stagehand_tools_agent.py", "r"
+        ) as f:
+            content = f.read()
+
+        # Check for required imports
+        assert "from swarms import Agent" in content
+        assert "def navigate_browser" in content
+        assert "def browser_act" in content
+        assert "def browser_extract" in content
+
+    def test_mcp_agent_imports(self):
+        """Test that MCP agent has correct imports."""
+        with open(
+            "examples/stagehand/3_stagehand_mcp_agent.py", "r"
+        ) as f:
+            content = f.read()
+
+        # Check for required imports
+        assert "from swarms import Agent" in content
+        assert "class StagehandMCPAgent" in content
+        assert "mcp_url" in content
+
+    def test_workflow_agent_imports(self):
+        """Test that workflow agent has correct imports."""
+        with open(
+            "examples/stagehand/4_stagehand_multi_agent_workflow.py",
+            "r",
+        ) as f:
+            content = f.read()
+
+        # Check for required imports
+        assert (
+            "from swarms import Agent, SequentialWorkflow, ConcurrentWorkflow"
+            in content
+        )
+        assert (
+            "from swarms.structs.agent_rearrange import AgentRearrange"
+            in content
+        )
+
+
+class TestStagehandMockIntegration:
+    """Test Stagehand integration with mocked dependencies."""
+
+    def test_mock_stagehand_initialization(self):
+        """Test that Stagehand can be mocked and initialized."""
+
+        # Setup mock without importing actual stagehand
+        mock_stagehand = MagicMock()
+        mock_instance = MagicMock()
+        mock_instance.init = MagicMock()
+        mock_stagehand.return_value = mock_instance
+
+        # Mock config creation
+        config = MagicMock()
+        stagehand_instance = mock_stagehand(config)
+
+        # Verify mock works
+        assert stagehand_instance is not None
+        assert hasattr(stagehand_instance, "init")
+
+    def test_json_serialization(self):
+        """Test JSON serialization for agent responses."""
+
+        # Test data that would come from browser automation
+        test_data = {
+            "task": "Navigate to example.com",
+            "status": "completed",
+            "data": {
+                "navigated_to": "https://example.com",
+                "extracted": ["item1", "item2"],
+                "action": "navigate",
+            },
+        }
+
+        # Test serialization
+        json_result = json.dumps(test_data, indent=2)
+        assert isinstance(json_result, str)
+
+        # Test deserialization
+        parsed_data = json.loads(json_result)
+        assert parsed_data["task"] == "Navigate to example.com"
+        assert parsed_data["status"] == "completed"
+        assert len(parsed_data["data"]["extracted"]) == 2
+
+    def test_url_extraction_logic(self):
+        """Test URL extraction logic from task strings."""
+        import re
+
+        # Test cases
+        test_cases = [
+            (
+                "Navigate to https://example.com",
+                ["https://example.com"],
+            ),
+            ("Go to google.com and search", ["google.com"]),
+            (
+                "Visit https://github.com/repo",
+                ["https://github.com/repo"],
+            ),
+            ("Open example.org", ["example.org"]),
+        ]
+
+        url_pattern = r"https?://[^\s]+"
+        domain_pattern = r"(\w+\.\w+)"
+
+        for task, expected in test_cases:
+            # Extract full URLs
+            urls = re.findall(url_pattern, task)
+
+            # If no full URLs, extract domains
+            if not urls:
+                domains = re.findall(domain_pattern, task)
+                if domains:
+                    urls = domains
+
+            assert (
+                len(urls) > 0
+            ), f"Failed to extract URL from: {task}"
+            assert (
+                urls[0] in expected
+            ), f"Expected {expected}, got {urls}"
+
+
+class TestSwarmsPatternsCompliance:
+    """Test compliance with Swarms framework patterns."""
+
+    def test_agent_inheritance_pattern(self):
+        """Test that wrapper agent follows Swarms Agent inheritance pattern."""
+
+        # Read the wrapper agent file
+        with open(
+            "examples/stagehand/1_stagehand_wrapper_agent.py", "r"
+        ) as f:
+            content = f.read()
+
+        # Check inheritance pattern
+        assert "class StagehandAgent(SwarmsAgent):" in content
+        assert "def run(self, task: str" in content
+        assert "return" in content
+
+    def test_tools_pattern(self):
+        """Test that tools follow Swarms function-based pattern."""
+
+        # Read the tools agent file
+        with open(
+            "examples/stagehand/2_stagehand_tools_agent.py", "r"
+        ) as f:
+            content = f.read()
+
+        # Check function-based tool pattern
+        assert "def navigate_browser(url: str) -> str:" in content
+        assert "def browser_act(action: str) -> str:" in content
+        assert "def browser_extract(query: str) -> str:" in content
+        assert "def browser_observe(query: str) -> str:" in content
+
+    def test_mcp_integration_pattern(self):
+        """Test MCP integration follows Swarms pattern."""
+
+        # Read the MCP agent file
+        with open(
+            "examples/stagehand/3_stagehand_mcp_agent.py", "r"
+        ) as f:
+            content = f.read()
+
+        # Check MCP pattern
+        assert "mcp_url=" in content
+        assert "Agent(" in content
+
+    def test_workflow_patterns(self):
+        """Test workflow patterns are properly used."""
+
+        # Read the workflow file
+        with open(
+            "examples/stagehand/4_stagehand_multi_agent_workflow.py",
+            "r",
+        ) as f:
+            content = f.read()
+
+        # Check workflow patterns
+        assert "SequentialWorkflow" in content
+        assert "ConcurrentWorkflow" in content
+        assert "AgentRearrange" in content
+
+
+class TestDocumentationAndExamples:
+    """Test documentation and example completeness."""
+
+    def test_readme_completeness(self):
+        """Test that README contains essential information."""
+
+        with open("examples/stagehand/README.md", "r") as f:
+            content = f.read()
+
+        required_sections = [
+            "# Stagehand Browser Automation Integration",
+            "## Overview",
+            "## Examples",
+            "## Setup",
+            "## Use Cases",
+            "## Best Practices",
+        ]
+
+        for section in required_sections:
+            assert section in content, f"Missing section: {section}"
+
+    def test_requirements_file(self):
+        """Test that requirements file has necessary dependencies."""
+
+        with open("examples/stagehand/requirements.txt", "r") as f:
+            content = f.read()
+
+        required_deps = [
+            "swarms",
+            "stagehand",
+            "python-dotenv",
+            "pydantic",
+            "loguru",
+        ]
+
+        for dep in required_deps:
+            assert dep in content, f"Missing dependency: {dep}"
+
+    def test_example_files_have_docstrings(self):
+        """Test that example files have proper docstrings."""
+
+        example_files = [
+            "examples/stagehand/1_stagehand_wrapper_agent.py",
+            "examples/stagehand/2_stagehand_tools_agent.py",
+            "examples/stagehand/3_stagehand_mcp_agent.py",
+            "examples/stagehand/4_stagehand_multi_agent_workflow.py",
+        ]
+
+        for file_path in example_files:
+            with open(file_path, "r") as f:
+                content = f.read()
+
+            # Check for module docstring
+            assert (
+                '"""' in content[:500]
+            ), f"Missing docstring in {file_path}"
+
+            # Check for main execution block
+            assert (
+                'if __name__ == "__main__":' in content
+            ), f"Missing main block in {file_path}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])