add stagehand example

3 months ago · b04e60ca17
parent daf0891611
commit b04e60ca17
7 changed files with 1818 additions and 0 deletions
--- a/examples/stagehand/1_stagehand_wrapper_agent.py
+++ b/examples/stagehand/1_stagehand_wrapper_agent.py
@ -0,0 +1,257 @@
 """
 Stagehand Browser Automation Agent for Swarms
 =============================================
 This example demonstrates how to create a Swarms-compatible agent
 that wraps Stagehand's browser automation capabilities.
 The StagehandAgent class inherits from the Swarms Agent base class
 and implements browser automation through natural language commands.
 """
 import asyncio
 import json
 import os
 from typing import Any, Dict, List, Optional, Union
 from dotenv import load_dotenv
 from loguru import logger
 from pydantic import BaseModel, Field
 from swarms import Agent as SwarmsAgent
 from stagehand import Stagehand, StagehandConfig
 load_dotenv()
 class WebData(BaseModel):
    """Schema for extracted web data."""
    url: str = Field(..., description="The URL of the page")
    title: str = Field(..., description="Page title")
    content: str = Field(..., description="Extracted content")
    metadata: Dict[str, Any] = Field(
        default_factory=dict, description="Additional metadata"
    )
 class StagehandAgent(SwarmsAgent):
    """
    A Swarms agent that integrates Stagehand for browser automation.
    This agent can navigate websites, extract data, perform actions,
    and observe page elements using natural language instructions.
    """
    def __init__(
        self,
        agent_name: str = "StagehandBrowserAgent",
        browserbase_api_key: Optional[str] = None,
        browserbase_project_id: Optional[str] = None,
        model_name: str = "gpt-4o-mini",
        model_api_key: Optional[str] = None,
        env: str = "LOCAL",  # LOCAL or BROWSERBASE
        *args,
        **kwargs,
    ):
        """
        Initialize the StagehandAgent.
        Args:
            agent_name: Name of the agent
            browserbase_api_key: API key for Browserbase (if using cloud)
            browserbase_project_id: Project ID for Browserbase
            model_name: LLM model to use
            model_api_key: API key for the model
            env: Environment - LOCAL or BROWSERBASE
        """
        # Don't pass stagehand-specific args to parent
        super().__init__(agent_name=agent_name, *args, **kwargs)
        self.stagehand_config = StagehandConfig(
            env=env,
            api_key=browserbase_api_key
            or os.getenv("BROWSERBASE_API_KEY"),
            project_id=browserbase_project_id
            or os.getenv("BROWSERBASE_PROJECT_ID"),
            model_name=model_name,
            model_api_key=model_api_key or os.getenv("OPENAI_API_KEY"),
        )
        self.stagehand = None
        self._initialized = False
    async def _init_stagehand(self):
        """Initialize Stagehand instance."""
        if not self._initialized:
            self.stagehand = Stagehand(self.stagehand_config)
            await self.stagehand.init()
            self._initialized = True
            logger.info(f"Stagehand initialized for {self.agent_name}")
    async def _close_stagehand(self):
        """Close Stagehand instance."""
        if self.stagehand and self._initialized:
            await self.stagehand.close()
            self._initialized = False
            logger.info(f"Stagehand closed for {self.agent_name}")
    def run(self, task: str, *args, **kwargs) -> str:
        """
        Execute a browser automation task.
        The task string should contain instructions like:
        - "Navigate to example.com and extract the main content"
        - "Go to google.com and search for 'AI agents'"
        - "Extract all company names from https://ycombinator.com"
        Args:
            task: Natural language description of the browser task
        Returns:
            String result of the task execution
        """
        return asyncio.run(self._async_run(task, *args, **kwargs))
    async def _async_run(
        self, task: str, *args, **kwargs
    ) -> str:
        """Async implementation of run method."""
        try:
            await self._init_stagehand()
            # Parse the task to determine actions
            result = await self._execute_browser_task(task)
            return json.dumps(result, indent=2)
        except Exception as e:
            logger.error(f"Error in browser task: {str(e)}")
            return f"Error executing browser task: {str(e)}"
        finally:
            # Keep browser open for potential follow-up tasks
            pass
    async def _execute_browser_task(
        self, task: str
    ) -> Dict[str, Any]:
        """
        Execute a browser task based on natural language instructions.
        This method interprets the task and calls appropriate Stagehand methods.
        """
        page = self.stagehand.page
        result = {"task": task, "status": "completed", "data": {}}
        # Determine if task involves navigation
        if any(
            keyword in task.lower()
            for keyword in ["navigate", "go to", "visit", "open"]
        ):
            # Extract URL from task
            import re
            url_pattern = r"https?://[^\s]+"
            urls = re.findall(url_pattern, task)
            if not urls and any(
                domain in task for domain in [".com", ".org", ".net"]
            ):
                # Try to extract domain names
                domain_pattern = r"(\w+\.\w+)"
                domains = re.findall(domain_pattern, task)
                if domains:
                    urls = [f"https://{domain}" for domain in domains]
            if urls:
                url = urls[0]
                await page.goto(url)
                result["data"]["navigated_to"] = url
                logger.info(f"Navigated to {url}")
        # Determine action type
        if "extract" in task.lower():
            # Perform extraction
            extraction_prompt = task.replace("extract", "").strip()
            extracted = await page.extract(extraction_prompt)
            result["data"]["extracted"] = extracted
            result["action"] = "extract"
        elif "click" in task.lower() or "press" in task.lower():
            # Perform action
            action_result = await page.act(task)
            result["data"]["action_performed"] = str(action_result)
            result["action"] = "act"
        elif "search" in task.lower():
            # Perform search action
            search_query = task.split("search for")[-1].strip().strip("'\"")
            # First, find the search box
            search_box = await page.observe("find the search input field")
            if search_box:
                # Click on search box and type
                await page.act(f"click on {search_box[0]}")
                await page.act(f"type '{search_query}'")
                await page.act("press Enter")
                result["data"]["search_query"] = search_query
                result["action"] = "search"
        elif "observe" in task.lower() or "find" in task.lower():
            # Perform observation
            observation = await page.observe(task)
            result["data"]["observation"] = [
                {"description": obs.description, "selector": obs.selector}
                for obs in observation
            ]
            result["action"] = "observe"
        else:
            # General action
            action_result = await page.act(task)
            result["data"]["action_result"] = str(action_result)
            result["action"] = "general"
        return result
    def cleanup(self):
        """Clean up browser resources."""
        if self._initialized:
            asyncio.run(self._close_stagehand())
    def __del__(self):
        """Ensure browser is closed on deletion."""
        self.cleanup()
 # Example usage
 if __name__ == "__main__":
    # Create a Stagehand browser agent
    browser_agent = StagehandAgent(
        agent_name="WebScraperAgent",
        model_name="gpt-4o-mini",
        env="LOCAL",  # Use LOCAL for Playwright, BROWSERBASE for cloud
    )
    # Example 1: Navigate and extract data
    print("Example 1: Basic navigation and extraction")
    result1 = browser_agent.run(
        "Navigate to https://news.ycombinator.com and extract the titles of the top 5 stories"
    )
    print(result1)
    print("\n" + "=" * 50 + "\n")
    # Example 2: Perform a search
    print("Example 2: Search on a website")
    result2 = browser_agent.run(
        "Go to google.com and search for 'Swarms AI framework'"
    )
    print(result2)
    print("\n" + "=" * 50 + "\n")
    # Example 3: Extract structured data
    print("Example 3: Extract specific information")
    result3 = browser_agent.run(
        "Navigate to https://example.com and extract the main heading and first paragraph"
    )
    print(result3)
    # Clean up
    browser_agent.cleanup()
--- a/examples/stagehand/2_stagehand_tools_agent.py
+++ b/examples/stagehand/2_stagehand_tools_agent.py
@ -0,0 +1,332 @@
 """
 Stagehand Tools for Swarms Agent
 =================================
 This example demonstrates how to create Stagehand browser automation tools
 that can be used by a standard Swarms Agent. Each Stagehand method (act, 
 extract, observe) becomes a separate tool that the agent can use.
 This approach gives the agent more fine-grained control over browser
 automation tasks.
 """
 import asyncio
 import json
 import os
 from typing import Any, Dict, List, Optional, Union
 from dotenv import load_dotenv
 from loguru import logger
 from pydantic import BaseModel, Field
 from swarms import Agent
 from swarms.tools.base_tool import BaseTool
 from stagehand import Stagehand, StagehandConfig
 load_dotenv()
 class BrowserState:
    """Singleton to manage browser state across tools."""
    _instance = None
    _stagehand = None
    _initialized = False
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance
    async def init_browser(
        self,
        env: str = "LOCAL",
        api_key: Optional[str] = None,
        project_id: Optional[str] = None,
        model_name: str = "gpt-4o-mini",
        model_api_key: Optional[str] = None,
    ):
        """Initialize the browser if not already initialized."""
        if not self._initialized:
            config = StagehandConfig(
                env=env,
                api_key=api_key or os.getenv("BROWSERBASE_API_KEY"),
                project_id=project_id or os.getenv("BROWSERBASE_PROJECT_ID"),
                model_name=model_name,
                model_api_key=model_api_key or os.getenv("OPENAI_API_KEY"),
            )
            self._stagehand = Stagehand(config)
            await self._stagehand.init()
            self._initialized = True
            logger.info("Stagehand browser initialized")
    async def get_page(self):
        """Get the current page instance."""
        if not self._initialized:
            raise RuntimeError("Browser not initialized. Call init_browser first.")
        return self._stagehand.page
    async def close(self):
        """Close the browser."""
        if self._initialized and self._stagehand:
            await self._stagehand.close()
            self._initialized = False
            logger.info("Stagehand browser closed")
 # Browser state instance
 browser_state = BrowserState()
 class NavigateTool(BaseTool):
    """Tool for navigating to URLs in the browser."""
    def __init__(self):
        super().__init__(
            name="navigate_browser",
            description="Navigate to a URL in the browser. Input should be a valid URL starting with http:// or https://",
            verbose=True,
        )
    def run(self, url: str) -> str:
        """Navigate to the specified URL."""
        return asyncio.run(self._async_run(url))
    async def _async_run(self, url: str) -> str:
        try:
            await browser_state.init_browser()
            page = await browser_state.get_page()
            # Ensure URL has protocol
            if not url.startswith(("http://", "https://")):
                url = f"https://{url}"
            await page.goto(url)
            return f"Successfully navigated to {url}"
        except Exception as e:
            logger.error(f"Navigation error: {str(e)}")
            return f"Failed to navigate to {url}: {str(e)}"
 class ActTool(BaseTool):
    """Tool for performing actions on web pages."""
    def __init__(self):
        super().__init__(
            name="browser_act",
            description=(
                "Perform an action on the current web page using natural language. "
                "Examples: 'click the submit button', 'type hello@example.com in the email field', "
                "'scroll down', 'press Enter'"
            ),
            verbose=True,
        )
    def run(self, action: str) -> str:
        """Perform the specified action."""
        return asyncio.run(self._async_run(action))
    async def _async_run(self, action: str) -> str:
        try:
            await browser_state.init_browser()
            page = await browser_state.get_page()
            result = await page.act(action)
            return f"Action performed: {action}. Result: {result}"
        except Exception as e:
            logger.error(f"Action error: {str(e)}")
            return f"Failed to perform action '{action}': {str(e)}"
 class ExtractTool(BaseTool):
    """Tool for extracting data from web pages."""
    def __init__(self):
        super().__init__(
            name="browser_extract",
            description=(
                "Extract information from the current web page using natural language. "
                "Examples: 'extract all email addresses', 'get the main article text', "
                "'find all product prices', 'extract the page title and meta description'"
            ),
            verbose=True,
        )
    def run(self, query: str) -> str:
        """Extract information based on the query."""
        return asyncio.run(self._async_run(query))
    async def _async_run(self, query: str) -> str:
        try:
            await browser_state.init_browser()
            page = await browser_state.get_page()
            extracted = await page.extract(query)
            # Convert to JSON string for agent consumption
            if isinstance(extracted, (dict, list)):
                return json.dumps(extracted, indent=2)
            else:
                return str(extracted)
        except Exception as e:
            logger.error(f"Extraction error: {str(e)}")
            return f"Failed to extract '{query}': {str(e)}"
 class ObserveTool(BaseTool):
    """Tool for observing elements on web pages."""
    def __init__(self):
        super().__init__(
            name="browser_observe",
            description=(
                "Observe and find elements on the current web page using natural language. "
                "Returns information about elements including their selectors. "
                "Examples: 'find the search box', 'locate the submit button', "
                "'find all navigation links'"
            ),
            verbose=True,
        )
    def run(self, query: str) -> str:
        """Observe elements based on the query."""
        return asyncio.run(self._async_run(query))
    async def _async_run(self, query: str) -> str:
        try:
            await browser_state.init_browser()
            page = await browser_state.get_page()
            observations = await page.observe(query)
            # Format observations for readability
            result = []
            for obs in observations:
                result.append({
                    "description": obs.description,
                    "selector": obs.selector,
                    "method": obs.method
                })
            return json.dumps(result, indent=2)
        except Exception as e:
            logger.error(f"Observation error: {str(e)}")
            return f"Failed to observe '{query}': {str(e)}"
 class ScreenshotTool(BaseTool):
    """Tool for taking screenshots of the current page."""
    def __init__(self):
        super().__init__(
            name="browser_screenshot",
            description="Take a screenshot of the current web page. Optionally provide a filename.",
            verbose=True,
        )
    def run(self, filename: str = "screenshot.png") -> str:
        """Take a screenshot."""
        return asyncio.run(self._async_run(filename))
    async def _async_run(self, filename: str) -> str:
        try:
            await browser_state.init_browser()
            page = await browser_state.get_page()
            # Ensure .png extension
            if not filename.endswith(".png"):
                filename += ".png"
            # Get the underlying Playwright page
            playwright_page = page.page
            await playwright_page.screenshot(path=filename)
            return f"Screenshot saved to {filename}"
        except Exception as e:
            logger.error(f"Screenshot error: {str(e)}")
            return f"Failed to take screenshot: {str(e)}"
 class CloseBrowserTool(BaseTool):
    """Tool for closing the browser."""
    def __init__(self):
        super().__init__(
            name="close_browser",
            description="Close the browser when done with automation tasks",
            verbose=True,
        )
    def run(self, *args) -> str:
        """Close the browser."""
        return asyncio.run(self._async_run())
    async def _async_run(self) -> str:
        try:
            await browser_state.close()
            return "Browser closed successfully"
        except Exception as e:
            logger.error(f"Close browser error: {str(e)}")
            return f"Failed to close browser: {str(e)}"
 # Example usage
 if __name__ == "__main__":
    # Create browser automation tools
    navigate_tool = NavigateTool()
    act_tool = ActTool()
    extract_tool = ExtractTool()
    observe_tool = ObserveTool()
    screenshot_tool = ScreenshotTool()
    close_browser_tool = CloseBrowserTool()
    # Create a Swarms agent with browser tools
    browser_agent = Agent(
        agent_name="BrowserAutomationAgent",
        model_name="gpt-4o-mini",
        max_loops=1,
        tools=[
            navigate_tool,
            act_tool,
            extract_tool,
            observe_tool,
            screenshot_tool,
            close_browser_tool,
        ],
        system_prompt="""You are a web browser automation specialist. You can:
        1. Navigate to websites using the navigate_browser tool
        2. Perform actions like clicking and typing using the browser_act tool
        3. Extract information from pages using the browser_extract tool
        4. Find and observe elements using the browser_observe tool
        5. Take screenshots using the browser_screenshot tool
        6. Close the browser when done using the close_browser tool
        Always start by navigating to a URL before trying to interact with a page.
        Be specific in your actions and extractions. When done with tasks, close the browser.""",
    )
    # Example 1: Research task
    print("Example 1: Automated web research")
    result1 = browser_agent.run(
        "Go to hackernews (news.ycombinator.com) and extract the titles of the top 5 stories. Then take a screenshot."
    )
    print(result1)
    print("\n" + "=" * 50 + "\n")
    # Example 2: Search task
    print("Example 2: Perform a web search")
    result2 = browser_agent.run(
        "Navigate to google.com, search for 'Python web scraping best practices', and extract the first 3 search result titles"
    )
    print(result2)
    print("\n" + "=" * 50 + "\n")
    # Example 3: Form interaction
    print("Example 3: Interact with a form")
    result3 = browser_agent.run(
        "Go to example.com and observe what elements are on the page. Then extract all the text content."
    )
    print(result3)
    # Clean up
    browser_agent.run("Close the browser")
--- a/examples/stagehand/3_stagehand_mcp_agent.py
+++ b/examples/stagehand/3_stagehand_mcp_agent.py
@ -0,0 +1,252 @@
 """
 Stagehand MCP Server Integration with Swarms
 ============================================
 This example demonstrates how to use the Stagehand MCP (Model Context Protocol)
 server with Swarms agents. The MCP server provides browser automation capabilities
 as standardized tools that can be discovered and used by agents.
 Prerequisites:
 1. Install and run the Stagehand MCP server:
   cd stagehand-mcp-server
   npm install
   npm run build
   npm start
 2. The server will start on http://localhost:3000/sse
 Features:
 - Automatic tool discovery from MCP server
 - Multi-session browser management
 - Built-in screenshot resources
 - Prompt templates for common tasks
 """
 import asyncio
 import os
 from typing import List, Optional
 from dotenv import load_dotenv
 from loguru import logger
 from swarms import Agent
 load_dotenv()
 class StagehandMCPAgent:
    """
    A Swarms agent that connects to the Stagehand MCP server
    for browser automation capabilities.
    """
    def __init__(
        self,
        agent_name: str = "StagehandMCPAgent",
        mcp_server_url: str = "http://localhost:3000/sse",
        model_name: str = "gpt-4o-mini",
        max_loops: int = 1,
    ):
        """
        Initialize the Stagehand MCP Agent.
        Args:
            agent_name: Name of the agent
            mcp_server_url: URL of the Stagehand MCP server
            model_name: LLM model to use
            max_loops: Maximum number of reasoning loops
        """
        self.agent = Agent(
            agent_name=agent_name,
            model_name=model_name,
            max_loops=max_loops,
            # Connect to the Stagehand MCP server
            mcp_url=mcp_server_url,
            system_prompt="""You are a web browser automation specialist with access to Stagehand MCP tools.
 Available tools from the MCP server:
 - navigate: Navigate to a URL
 - act: Perform actions on web pages (click, type, etc.)
 - extract: Extract data from web pages
 - observe: Find and observe elements on pages
 - screenshot: Take screenshots
 - createSession: Create new browser sessions for parallel tasks
 - listSessions: List active browser sessions
 - closeSession: Close browser sessions
 For multi-page workflows, you can create multiple sessions.
 Always be specific in your actions and extractions.
 Remember to close sessions when done with them.""",
            verbose=True,
        )
    def run(self, task: str) -> str:
        """Run a browser automation task."""
        return self.agent.run(task)
 class MultiSessionBrowserSwarm:
    """
    A multi-agent swarm that uses multiple browser sessions
    for parallel web automation tasks.
    """
    def __init__(
        self,
        mcp_server_url: str = "http://localhost:3000/sse",
        num_agents: int = 3,
    ):
        """
        Initialize a swarm of browser automation agents.
        Args:
            mcp_server_url: URL of the Stagehand MCP server
            num_agents: Number of agents to create
        """
        self.agents = []
        # Create specialized agents for different tasks
        agent_roles = [
            ("DataExtractor", "You specialize in extracting structured data from websites."),
            ("FormFiller", "You specialize in filling out forms and interacting with web applications."),
            ("WebMonitor", "You specialize in monitoring websites for changes and capturing screenshots."),
        ]
        for i in range(min(num_agents, len(agent_roles))):
            name, specialization = agent_roles[i]
            agent = Agent(
                agent_name=f"{name}_{i}",
                model_name="gpt-4o-mini",
                max_loops=1,
                mcp_url=mcp_server_url,
                system_prompt=f"""You are a web browser automation specialist. {specialization}
 You have access to Stagehand MCP tools including:
 - createSession: Create a new browser session
 - navigate_session: Navigate to URLs in a specific session
 - act_session: Perform actions in a specific session
 - extract_session: Extract data from a specific session
 - observe_session: Observe elements in a specific session
 - closeSession: Close a session when done
 Always create your own session for tasks to work independently from other agents.""",
                verbose=True,
            )
            self.agents.append(agent)
    def distribute_tasks(self, tasks: List[str]) -> List[str]:
        """Distribute tasks among agents."""
        results = []
        # Distribute tasks round-robin among agents
        for i, task in enumerate(tasks):
            agent_idx = i % len(self.agents)
            agent = self.agents[agent_idx]
            logger.info(f"Assigning task to {agent.agent_name}: {task}")
            result = agent.run(task)
            results.append(result)
        return results
 # Example usage
 if __name__ == "__main__":
    print("=" * 70)
    print("Stagehand MCP Server Integration Examples")
    print("=" * 70)
    print("\nMake sure the Stagehand MCP server is running on http://localhost:3000/sse")
    print("Run: cd stagehand-mcp-server && npm start\n")
    # Example 1: Single agent with MCP tools
    print("\nExample 1: Single Agent with MCP Tools")
    print("-" * 40)
    mcp_agent = StagehandMCPAgent(
        agent_name="WebResearchAgent",
        mcp_server_url="http://localhost:3000/sse",
    )
    # Research task using MCP tools
    result1 = mcp_agent.run(
        """Navigate to news.ycombinator.com and extract the following:
        1. The titles of the top 5 stories
        2. Their points/scores
        3. Number of comments for each
        Then take a screenshot of the page."""
    )
    print(f"Result: {result1}")
    print("\n" + "=" * 70 + "\n")
    # Example 2: Multi-session parallel browsing
    print("Example 2: Multi-Session Parallel Browsing")
    print("-" * 40)
    parallel_agent = StagehandMCPAgent(
        agent_name="ParallelBrowserAgent",
        mcp_server_url="http://localhost:3000/sse",
    )
    result2 = parallel_agent.run(
        """Create 3 browser sessions and perform these tasks in parallel:
        1. Session 1: Go to github.com/trending and extract the top 3 trending repositories
        2. Session 2: Go to reddit.com/r/programming and extract the top 3 posts
        3. Session 3: Go to stackoverflow.com and extract the featured questions
        After extracting data from all sessions, close them."""
    )
    print(f"Result: {result2}")
    print("\n" + "=" * 70 + "\n")
    # Example 3: Multi-agent browser swarm
    print("Example 3: Multi-Agent Browser Swarm")
    print("-" * 40)
    # Create a swarm of specialized browser agents
    browser_swarm = MultiSessionBrowserSwarm(
        mcp_server_url="http://localhost:3000/sse",
        num_agents=3,
    )
    # Define tasks for the swarm
    swarm_tasks = [
        "Create a session, navigate to python.org, and extract information about the latest Python version and its key features",
        "Create a session, go to npmjs.com, search for 'stagehand', and extract information about the package including version and description",
        "Create a session, visit playwright.dev, and extract the main features and benefits listed on the homepage",
    ]
    print("Distributing tasks to browser swarm...")
    swarm_results = browser_swarm.distribute_tasks(swarm_tasks)
    for i, result in enumerate(swarm_results):
        print(f"\nTask {i+1} Result: {result}")
    print("\n" + "=" * 70 + "\n")
    # Example 4: Complex workflow with session management
    print("Example 4: Complex Multi-Page Workflow")
    print("-" * 40)
    workflow_agent = StagehandMCPAgent(
        agent_name="WorkflowAgent",
        mcp_server_url="http://localhost:3000/sse",
        max_loops=2,  # Allow more complex reasoning
    )
    result4 = workflow_agent.run(
        """Perform a comprehensive analysis of AI frameworks:
        1. Create a new session
        2. Navigate to github.com/huggingface/transformers and extract the star count and latest release info
        3. In the same session, navigate to github.com/openai/gpt-3 and extract similar information
        4. Navigate to github.com/anthropics/anthropic-sdk-python and extract repository statistics
        5. Take screenshots of each repository page
        6. Compile a comparison report of all three repositories
        7. Close the session when done"""
    )
    print(f"Result: {result4}")
    print("\n" + "=" * 70)
    print("All examples completed!")
    print("=" * 70)
--- a/examples/stagehand/4_stagehand_multi_agent_workflow.py
+++ b/examples/stagehand/4_stagehand_multi_agent_workflow.py
@ -0,0 +1,359 @@
 """
 Stagehand Multi-Agent Browser Automation Workflows
 =================================================
 This example demonstrates advanced multi-agent workflows using Stagehand
 for complex browser automation scenarios. It shows how multiple agents
 can work together to accomplish sophisticated web tasks.
 Use cases:
 1. E-commerce price monitoring across multiple sites
 2. Competitive analysis and market research
 3. Automated testing and validation workflows
 4. Data aggregation from multiple sources
 """
 import asyncio
 import json
 import os
 from datetime import datetime
 from typing import Any, Dict, List
 from dotenv import load_dotenv
 from loguru import logger
 from pydantic import BaseModel, Field
 from swarms import Agent, SequentialWorkflow, ConcurrentWorkflow
 from swarms.structs.agent_rearrange import AgentRearrange
 from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
 load_dotenv()
 # Pydantic models for structured data
 class ProductInfo(BaseModel):
    """Product information schema."""
    name: str = Field(..., description="Product name")
    price: float = Field(..., description="Product price")
    availability: str = Field(..., description="Availability status")
    url: str = Field(..., description="Product URL")
    screenshot_path: Optional[str] = Field(None, description="Screenshot file path")
 class MarketAnalysis(BaseModel):
    """Market analysis report schema."""
    timestamp: datetime = Field(default_factory=datetime.now)
    products: List[ProductInfo] = Field(..., description="List of products analyzed")
    price_range: Dict[str, float] = Field(..., description="Min and max prices")
    recommendations: List[str] = Field(..., description="Analysis recommendations")
 # Specialized browser agents
 class ProductScraperAgent(StagehandAgent):
    """Specialized agent for scraping product information."""
    def __init__(self, site_name: str, *args, **kwargs):
        super().__init__(
            agent_name=f"ProductScraper_{site_name}",
            *args,
            **kwargs
        )
        self.site_name = site_name
 class PriceMonitorAgent(StagehandAgent):
    """Specialized agent for monitoring price changes."""
    def __init__(self, *args, **kwargs):
        super().__init__(
            agent_name="PriceMonitorAgent",
            *args,
            **kwargs
        )
 # Example 1: E-commerce Price Comparison Workflow
 def create_price_comparison_workflow():
    """
    Create a workflow that compares prices across multiple e-commerce sites.
    """
    # Create specialized agents for different sites
    amazon_agent = StagehandAgent(
        agent_name="AmazonScraperAgent",
        model_name="gpt-4o-mini",
        env="LOCAL",
    )
    ebay_agent = StagehandAgent(
        agent_name="EbayScraperAgent",
        model_name="gpt-4o-mini",
        env="LOCAL",
    )
    analysis_agent = Agent(
        agent_name="PriceAnalysisAgent",
        model_name="gpt-4o-mini",
        system_prompt="""You are a price analysis expert. Analyze product prices from multiple sources
        and provide insights on the best deals, price trends, and recommendations.
        Focus on value for money and highlight any significant price differences.""",
    )
    # Create concurrent workflow for parallel scraping
    scraping_workflow = ConcurrentWorkflow(
        agents=[amazon_agent, ebay_agent],
        max_loops=1,
        verbose=True,
    )
    # Create sequential workflow: scrape -> analyze
    full_workflow = SequentialWorkflow(
        agents=[scraping_workflow, analysis_agent],
        max_loops=1,
        verbose=True,
    )
    return full_workflow
 # Example 2: Competitive Analysis Workflow
 def create_competitive_analysis_workflow():
    """
    Create a workflow for competitive analysis across multiple company websites.
    """
    # Agent for extracting company information
    company_researcher = StagehandAgent(
        agent_name="CompanyResearchAgent",
        model_name="gpt-4o-mini",
        env="LOCAL",
    )
    # Agent for analyzing social media presence
    social_media_agent = StagehandAgent(
        agent_name="SocialMediaAnalysisAgent",
        model_name="gpt-4o-mini",
        env="LOCAL",
    )
    # Agent for compiling competitive analysis report
    report_compiler = Agent(
        agent_name="CompetitiveAnalysisReporter",
        model_name="gpt-4o-mini",
        system_prompt="""You are a competitive analysis expert. Compile comprehensive reports
        based on company information and social media presence data. Identify strengths,
        weaknesses, and market positioning for each company.""",
    )
    # Create agent rearrange for flexible routing
    workflow_pattern = "company_researcher -> social_media_agent -> report_compiler"
    competitive_workflow = AgentRearrange(
        agents=[company_researcher, social_media_agent, report_compiler],
        flow=workflow_pattern,
        verbose=True,
    )
    return competitive_workflow
 # Example 3: Automated Testing Workflow
 def create_automated_testing_workflow():
    """
    Create a workflow for automated web application testing.
    """
    # Agent for UI testing
    ui_tester = StagehandAgent(
        agent_name="UITestingAgent",
        model_name="gpt-4o-mini",
        env="LOCAL",
    )
    # Agent for form validation testing
    form_tester = StagehandAgent(
        agent_name="FormValidationAgent",
        model_name="gpt-4o-mini",
        env="LOCAL",
    )
    # Agent for accessibility testing
    accessibility_tester = StagehandAgent(
        agent_name="AccessibilityTestingAgent",
        model_name="gpt-4o-mini",
        env="LOCAL",
    )
    # Agent for compiling test results
    test_reporter = Agent(
        agent_name="TestReportCompiler",
        model_name="gpt-4o-mini",
        system_prompt="""You are a QA test report specialist. Compile test results from
        UI, form validation, and accessibility testing into a comprehensive report.
        Highlight any failures, warnings, and provide recommendations for fixes.""",
    )
    # Concurrent testing followed by report generation
    testing_workflow = ConcurrentWorkflow(
        agents=[ui_tester, form_tester, accessibility_tester],
        max_loops=1,
        verbose=True,
    )
    full_test_workflow = SequentialWorkflow(
        agents=[testing_workflow, test_reporter],
        max_loops=1,
        verbose=True,
    )
    return full_test_workflow
 # Example 4: News Aggregation and Sentiment Analysis
 def create_news_aggregation_workflow():
    """
    Create a workflow for news aggregation and sentiment analysis.
    """
    # Multiple news scraper agents
    news_scrapers = []
    news_sites = [
        ("TechCrunch", "https://techcrunch.com"),
        ("HackerNews", "https://news.ycombinator.com"),
        ("Reddit", "https://reddit.com/r/technology"),
    ]
    for site_name, url in news_sites:
        scraper = StagehandAgent(
            agent_name=f"{site_name}Scraper",
            model_name="gpt-4o-mini",
            env="LOCAL",
        )
        news_scrapers.append(scraper)
    # Sentiment analysis agent
    sentiment_analyzer = Agent(
        agent_name="SentimentAnalyzer",
        model_name="gpt-4o-mini",
        system_prompt="""You are a sentiment analysis expert. Analyze news articles and posts
        to determine overall sentiment (positive, negative, neutral) and identify key themes
        and trends in the technology sector.""",
    )
    # Trend identification agent
    trend_identifier = Agent(
        agent_name="TrendIdentifier",
        model_name="gpt-4o-mini",
        system_prompt="""You are a trend analysis expert. Based on aggregated news and sentiment
        data, identify emerging trends, hot topics, and potential market movements in the
        technology sector.""",
    )
    # Create workflow: parallel scraping -> sentiment analysis -> trend identification
    scraping_workflow = ConcurrentWorkflow(
        agents=news_scrapers,
        max_loops=1,
        verbose=True,
    )
    analysis_workflow = SequentialWorkflow(
        agents=[scraping_workflow, sentiment_analyzer, trend_identifier],
        max_loops=1,
        verbose=True,
    )
    return analysis_workflow
 # Main execution examples
 if __name__ == "__main__":
    print("=" * 70)
    print("Stagehand Multi-Agent Workflow Examples")
    print("=" * 70)
    # Example 1: Price Comparison
    print("\nExample 1: E-commerce Price Comparison")
    print("-" * 40)
    price_workflow = create_price_comparison_workflow()
    # Search for a specific product across multiple sites
    price_result = price_workflow.run(
        """Search for 'iPhone 15 Pro Max 256GB' on:
        1. Amazon - extract price, availability, and seller information
        2. eBay - extract price range, number of listings, and average price
        Take screenshots of search results from both sites.
        Compare the prices and provide recommendations on where to buy."""
    )
    print(f"Price Comparison Result:\n{price_result}")
    print("\n" + "=" * 70 + "\n")
    # Example 2: Competitive Analysis
    print("Example 2: Competitive Analysis")
    print("-" * 40)
    competitive_workflow = create_competitive_analysis_workflow()
    competitive_result = competitive_workflow.run(
        """Analyze these three AI companies:
        1. OpenAI - visit openai.com and extract mission, products, and recent announcements
        2. Anthropic - visit anthropic.com and extract their AI safety approach and products
        3. DeepMind - visit deepmind.com and extract research focus and achievements
        Then check their Twitter/X presence and recent posts.
        Compile a competitive analysis report comparing their market positioning."""
    )
    print(f"Competitive Analysis Result:\n{competitive_result}")
    print("\n" + "=" * 70 + "\n")
    # Example 3: Automated Testing
    print("Example 3: Automated Web Testing")
    print("-" * 40)
    testing_workflow = create_automated_testing_workflow()
    test_result = testing_workflow.run(
        """Test the website example.com:
        1. UI Testing: Check if all main navigation links work, images load, and layout is responsive
        2. Form Testing: If there are any forms, test with valid and invalid inputs
        3. Accessibility: Check for alt texts, ARIA labels, and keyboard navigation
        Take screenshots of any issues found and compile a comprehensive test report."""
    )
    print(f"Test Results:\n{test_result}")
    print("\n" + "=" * 70 + "\n")
    # Example 4: News Aggregation
    print("Example 4: Tech News Aggregation and Analysis")
    print("-" * 40)
    news_workflow = create_news_aggregation_workflow()
    news_result = news_workflow.run(
        """For each news source:
        1. TechCrunch: Extract the top 5 headlines about AI or machine learning
        2. HackerNews: Extract the top 5 posts related to AI/ML with most points
        3. Reddit r/technology: Extract top 5 posts about AI from the past week
        Analyze sentiment and identify emerging trends in AI technology."""
    )
    print(f"News Analysis Result:\n{news_result}")
    # Cleanup all browser instances
    print("\n" + "=" * 70)
    print("Cleaning up browser instances...")
    # Clean up agents
    for agent in price_workflow.agents:
        if isinstance(agent, StagehandAgent):
            agent.cleanup()
        elif hasattr(agent, 'agents'):  # For nested workflows
            for sub_agent in agent.agents:
                if isinstance(sub_agent, StagehandAgent):
                    sub_agent.cleanup()
    print("All workflows completed!")
    print("=" * 70)
--- a/examples/stagehand/README.md
+++ b/examples/stagehand/README.md
@ -0,0 +1,249 @@
 # Stagehand Browser Automation Integration for Swarms
 This directory contains examples demonstrating how to integrate [Stagehand](https://github.com/browserbase/stagehand), an AI-powered browser automation framework, with the Swarms multi-agent framework.
 ## Overview
 Stagehand provides natural language browser automation capabilities that can be seamlessly integrated into Swarms agents. This integration enables:
 - 🌐 **Natural Language Web Automation**: Use simple commands like "click the submit button" or "extract product prices"
 - 🤖 **Multi-Agent Browser Workflows**: Multiple agents can automate different websites simultaneously
 - 🔧 **Flexible Integration Options**: Use as a wrapped agent, individual tools, or via MCP server
 - 📊 **Complex Automation Scenarios**: E-commerce monitoring, competitive analysis, automated testing, and more
 ## Examples
 ### 1. Stagehand Wrapper Agent (`1_stagehand_wrapper_agent.py`)
 The simplest integration - wraps Stagehand as a Swarms-compatible agent.
 ```python
 from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
 # Create a browser automation agent
 browser_agent = StagehandAgent(
    agent_name="WebScraperAgent",
    model_name="gpt-4o-mini",
    env="LOCAL",  # or "BROWSERBASE" for cloud execution
 )
 # Use natural language to control the browser
 result = browser_agent.run(
    "Navigate to news.ycombinator.com and extract the top 5 story titles"
 )
 ```
 **Features:**
 - Inherits from Swarms `Agent` base class
 - Automatic browser lifecycle management
 - Natural language task interpretation
 - Support for both local (Playwright) and cloud (Browserbase) execution
 ### 2. Stagehand as Tools (`2_stagehand_tools_agent.py`)
 Provides fine-grained control by exposing Stagehand methods as individual tools.
 ```python
 from swarms import Agent
 from examples.stagehand.stagehand_tools_agent import (
    NavigateTool, ActTool, ExtractTool, ObserveTool, ScreenshotTool
 )
 # Create agent with browser tools
 browser_agent = Agent(
    agent_name="BrowserAutomationAgent",
    model_name="gpt-4o-mini",
    tools=[
        NavigateTool(),
        ActTool(),
        ExtractTool(),
        ObserveTool(),
        ScreenshotTool(),
    ],
 )
 # Agent can now use tools strategically
 result = browser_agent.run(
    "Go to google.com, search for 'Python tutorials', and extract the first 3 results"
 )
 ```
 **Available Tools:**
 - `NavigateTool`: Navigate to URLs
 - `ActTool`: Perform actions (click, type, scroll)
 - `ExtractTool`: Extract data from pages
 - `ObserveTool`: Find elements on pages
 - `ScreenshotTool`: Capture screenshots
 - `CloseBrowserTool`: Clean up browser resources
 ### 3. Stagehand MCP Server (`3_stagehand_mcp_agent.py`)
 Integrates with Stagehand's Model Context Protocol (MCP) server for standardized tool access.
 ```python
 from examples.stagehand.stagehand_mcp_agent import StagehandMCPAgent
 # Connect to Stagehand MCP server
 mcp_agent = StagehandMCPAgent(
    agent_name="WebResearchAgent",
    mcp_server_url="http://localhost:3000/sse",
 )
 # Use MCP tools including multi-session management
 result = mcp_agent.run("""
    Create 3 browser sessions and:
    1. Session 1: Check Python.org for latest version
    2. Session 2: Check PyPI for trending packages  
    3. Session 3: Check GitHub Python trending repos
    Compile a Python ecosystem status report.
 """)
 ```
 **MCP Features:**
 - Automatic tool discovery
 - Multi-session browser management
 - Built-in screenshot resources
 - Prompt templates for common tasks
 ### 4. Multi-Agent Workflows (`4_stagehand_multi_agent_workflow.py`)
 Demonstrates complex multi-agent browser automation scenarios.
 ```python
 from examples.stagehand.stagehand_multi_agent_workflow import (
    create_price_comparison_workflow,
    create_competitive_analysis_workflow,
    create_automated_testing_workflow,
    create_news_aggregation_workflow
 )
 # Price comparison across multiple e-commerce sites
 price_workflow = create_price_comparison_workflow()
 result = price_workflow.run(
    "Compare prices for iPhone 15 Pro on Amazon and eBay"
 )
 # Competitive analysis of multiple companies
 competitive_workflow = create_competitive_analysis_workflow()
 result = competitive_workflow.run(
    "Analyze OpenAI, Anthropic, and DeepMind websites and social media"
 )
 ```
 **Workflow Examples:**
 - **E-commerce Monitoring**: Track prices across multiple sites
 - **Competitive Analysis**: Research competitors' websites and social media
 - **Automated Testing**: UI, form validation, and accessibility testing
 - **News Aggregation**: Collect and analyze news from multiple sources
 ## Setup
 ### Prerequisites
 1. **Install Swarms and Stagehand:**
 ```bash
 pip install swarms stagehand
 ```
 2. **Set up environment variables:**
 ```bash
 # For local browser automation (using Playwright)
 export OPENAI_API_KEY="your-openai-key"
 # For cloud browser automation (using Browserbase)
 export BROWSERBASE_API_KEY="your-browserbase-key"
 export BROWSERBASE_PROJECT_ID="your-project-id"
 ```
 3. **For MCP Server examples:**
 ```bash
 # Install and run the Stagehand MCP server
 cd stagehand-mcp-server
 npm install
 npm run build
 npm start
 ```
 ## Use Cases
 ### E-commerce Automation
 - Price monitoring and comparison
 - Inventory tracking
 - Automated purchasing workflows
 - Review aggregation
 ### Research and Analysis
 - Competitive intelligence gathering
 - Market research automation
 - Social media monitoring
 - News and trend analysis
 ### Quality Assurance
 - Automated UI testing
 - Cross-browser compatibility testing
 - Form validation testing
 - Accessibility compliance checking
 ### Data Collection
 - Web scraping at scale
 - Real-time data monitoring
 - Structured data extraction
 - Screenshot documentation
 ## Best Practices
 1. **Resource Management**: Always clean up browser instances when done
 ```python
 browser_agent.cleanup()  # For wrapper agents
 ```
 2. **Error Handling**: Stagehand includes self-healing capabilities, but wrap critical operations in try-except blocks
 3. **Parallel Execution**: Use `ConcurrentWorkflow` for simultaneous browser automation across multiple sites
 4. **Session Management**: For complex multi-page workflows, use the MCP server's session management capabilities
 5. **Rate Limiting**: Be respectful of websites - add delays between requests when necessary
 ## Testing
 Run the test suite to verify the integration:
 ```bash
 pytest tests/stagehand/test_stagehand_integration.py -v
 ```
 ## Troubleshooting
 ### Common Issues
 1. **Browser not starting**: Ensure Playwright is properly installed
 ```bash
 playwright install
 ```
 2. **MCP connection failed**: Verify the MCP server is running on the correct port
 3. **Timeout errors**: Increase timeout in StagehandConfig or agent initialization
 ### Debug Mode
 Enable verbose logging:
 ```python
 agent = StagehandAgent(
    agent_name="DebugAgent",
    verbose=True,  # Enable detailed logging
 )
 ```
 ## Contributing
 We welcome contributions! Please:
 1. Follow the existing code style
 2. Add tests for new features
 3. Update documentation
 4. Submit PRs with clear descriptions
 ## License
 These examples are provided under the same license as the Swarms framework. Stagehand is licensed separately - see [Stagehand's repository](https://github.com/browserbase/stagehand) for details.
--- a/examples/stagehand/requirements.txt
+++ b/examples/stagehand/requirements.txt
@ -0,0 +1,13 @@
 # Requirements for Stagehand integration examples
 swarms>=8.0.0
 stagehand>=0.1.0
 python-dotenv>=1.0.0
 pydantic>=2.0.0
 loguru>=0.7.0
 # For MCP server examples (optional)
 httpx>=0.24.0
 # For testing
 pytest>=7.0.0
 pytest-asyncio>=0.21.0
--- a/tests/stagehand/test_stagehand_integration.py
+++ b/tests/stagehand/test_stagehand_integration.py
@ -0,0 +1,356 @@
 """
 Tests for Stagehand Integration with Swarms
 ==========================================
 This module contains tests for the Stagehand browser automation
 integration with the Swarms framework.
 """
 import asyncio
 import json
 import pytest
 from unittest.mock import AsyncMock, MagicMock, patch
 from swarms import Agent
 from swarms.tools.base_tool import BaseTool
 # Mock Stagehand classes
 class MockObserveResult:
    def __init__(self, description, selector, method="click"):
        self.description = description
        self.selector = selector
        self.method = method
 class MockStagehandPage:
    async def goto(self, url):
        return None
    async def act(self, action):
        return f"Performed action: {action}"
    async def extract(self, query):
        return {"extracted": query, "data": ["item1", "item2"]}
    async def observe(self, query):
        return [
            MockObserveResult("Search box", "#search-input"),
            MockObserveResult("Submit button", "#submit-btn"),
        ]
 class MockStagehand:
    def __init__(self, config):
        self.config = config
        self.page = MockStagehandPage()
    async def init(self):
        pass
    async def close(self):
        pass
 # Test StagehandAgent wrapper
 class TestStagehandAgent:
    """Test the StagehandAgent wrapper class."""
    @patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
    def test_agent_initialization(self):
        """Test that StagehandAgent initializes correctly."""
        from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
        agent = StagehandAgent(
            agent_name="TestAgent",
            model_name="gpt-4o-mini",
            env="LOCAL",
        )
        assert agent.agent_name == "TestAgent"
        assert agent.stagehand_config.env == "LOCAL"
        assert agent.stagehand_config.model_name == "gpt-4o-mini"
        assert not agent._initialized
    @patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
    def test_navigation_task(self):
        """Test navigation and extraction task."""
        from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
        agent = StagehandAgent(
            agent_name="TestAgent",
            model_name="gpt-4o-mini",
            env="LOCAL",
        )
        result = agent.run("Navigate to example.com and extract the main content")
        # Parse result
        result_data = json.loads(result)
        assert result_data["status"] == "completed"
        assert "navigated_to" in result_data["data"]
        assert result_data["data"]["navigated_to"] == "https://example.com"
        assert "extracted" in result_data["data"]
    @patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
    def test_search_task(self):
        """Test search functionality."""
        from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
        agent = StagehandAgent(
            agent_name="TestAgent",
            model_name="gpt-4o-mini",
            env="LOCAL",
        )
        result = agent.run("Go to google.com and search for 'test query'")
        result_data = json.loads(result)
        assert result_data["status"] == "completed"
        assert result_data["data"]["search_query"] == "test query"
        assert result_data["action"] == "search"
    @patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
    def test_cleanup(self):
        """Test that cleanup properly closes browser."""
        from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
        agent = StagehandAgent(
            agent_name="TestAgent",
            model_name="gpt-4o-mini",
            env="LOCAL",
        )
        # Initialize the agent
        agent.run("Navigate to example.com")
        assert agent._initialized
        # Cleanup
        agent.cleanup()
        # After cleanup, should be able to run again
        result = agent.run("Navigate to example.com")
        assert result is not None
 # Test Stagehand Tools
 class TestStagehandTools:
    """Test individual Stagehand tools."""
    @patch('examples.stagehand.stagehand_tools_agent.browser_state')
    async def test_navigate_tool(self, mock_browser_state):
        """Test NavigateTool functionality."""
        from examples.stagehand.stagehand_tools_agent import NavigateTool
        # Setup mock
        mock_page = AsyncMock()
        mock_browser_state.get_page = AsyncMock(return_value=mock_page)
        mock_browser_state.init_browser = AsyncMock()
        tool = NavigateTool()
        result = await tool._async_run("https://example.com")
        assert "Successfully navigated to https://example.com" in result
        mock_page.goto.assert_called_once_with("https://example.com")
    @patch('examples.stagehand.stagehand_tools_agent.browser_state')
    async def test_act_tool(self, mock_browser_state):
        """Test ActTool functionality."""
        from examples.stagehand.stagehand_tools_agent import ActTool
        # Setup mock
        mock_page = AsyncMock()
        mock_page.act = AsyncMock(return_value="Action completed")
        mock_browser_state.get_page = AsyncMock(return_value=mock_page)
        mock_browser_state.init_browser = AsyncMock()
        tool = ActTool()
        result = await tool._async_run("click the button")
        assert "Action performed" in result
        assert "click the button" in result
        mock_page.act.assert_called_once_with("click the button")
    @patch('examples.stagehand.stagehand_tools_agent.browser_state')
    async def test_extract_tool(self, mock_browser_state):
        """Test ExtractTool functionality."""
        from examples.stagehand.stagehand_tools_agent import ExtractTool
        # Setup mock
        mock_page = AsyncMock()
        mock_page.extract = AsyncMock(return_value={"title": "Test Page", "content": "Test content"})
        mock_browser_state.get_page = AsyncMock(return_value=mock_page)
        mock_browser_state.init_browser = AsyncMock()
        tool = ExtractTool()
        result = await tool._async_run("extract the page title")
        # Result should be JSON string
        parsed_result = json.loads(result)
        assert parsed_result["title"] == "Test Page"
        assert parsed_result["content"] == "Test content"
    @patch('examples.stagehand.stagehand_tools_agent.browser_state')
    async def test_observe_tool(self, mock_browser_state):
        """Test ObserveTool functionality."""
        from examples.stagehand.stagehand_tools_agent import ObserveTool
        # Setup mock
        mock_page = AsyncMock()
        mock_observations = [
            MockObserveResult("Search input", "#search"),
            MockObserveResult("Submit button", "#submit"),
        ]
        mock_page.observe = AsyncMock(return_value=mock_observations)
        mock_browser_state.get_page = AsyncMock(return_value=mock_page)
        mock_browser_state.init_browser = AsyncMock()
        tool = ObserveTool()
        result = await tool._async_run("find the search box")
        # Result should be JSON string
        parsed_result = json.loads(result)
        assert len(parsed_result) == 2
        assert parsed_result[0]["description"] == "Search input"
        assert parsed_result[0]["selector"] == "#search"
 # Test MCP integration
 class TestStagehandMCP:
    """Test Stagehand MCP server integration."""
    def test_mcp_agent_initialization(self):
        """Test that MCP agent initializes with correct parameters."""
        from examples.stagehand.stagehand_mcp_agent import StagehandMCPAgent
        mcp_agent = StagehandMCPAgent(
            agent_name="TestMCPAgent",
            mcp_server_url="http://localhost:3000/sse",
            model_name="gpt-4o-mini",
        )
        assert mcp_agent.agent.agent_name == "TestMCPAgent"
        assert mcp_agent.agent.mcp_url == "http://localhost:3000/sse"
        assert mcp_agent.agent.model_name == "gpt-4o-mini"
    def test_multi_session_swarm_creation(self):
        """Test multi-session browser swarm creation."""
        from examples.stagehand.stagehand_mcp_agent import MultiSessionBrowserSwarm
        swarm = MultiSessionBrowserSwarm(
            mcp_server_url="http://localhost:3000/sse",
            num_agents=3,
        )
        assert len(swarm.agents) == 3
        assert swarm.agents[0].agent_name == "DataExtractor_0"
        assert swarm.agents[1].agent_name == "FormFiller_1"
        assert swarm.agents[2].agent_name == "WebMonitor_2"
    @patch('swarms.Agent.run')
    def test_task_distribution(self, mock_run):
        """Test task distribution among swarm agents."""
        from examples.stagehand.stagehand_mcp_agent import MultiSessionBrowserSwarm
        mock_run.return_value = "Task completed"
        swarm = MultiSessionBrowserSwarm(num_agents=2)
        tasks = ["Task 1", "Task 2", "Task 3"]
        results = swarm.distribute_tasks(tasks)
        assert len(results) == 3
        assert all(result == "Task completed" for result in results)
        assert mock_run.call_count == 3
 # Test multi-agent workflows
 class TestMultiAgentWorkflows:
    """Test multi-agent workflow configurations."""
    @patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
    def test_price_comparison_workflow_creation(self):
        """Test creation of price comparison workflow."""
        from examples.stagehand.stagehand_multi_agent_workflow import create_price_comparison_workflow
        workflow = create_price_comparison_workflow()
        # Should be a SequentialWorkflow with 2 agents
        assert len(workflow.agents) == 2
        # First agent should be a ConcurrentWorkflow
        assert hasattr(workflow.agents[0], 'agents')
        # Second agent should be the analysis agent
        assert workflow.agents[1].agent_name == "PriceAnalysisAgent"
    @patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
    def test_competitive_analysis_workflow_creation(self):
        """Test creation of competitive analysis workflow."""
        from examples.stagehand.stagehand_multi_agent_workflow import create_competitive_analysis_workflow
        workflow = create_competitive_analysis_workflow()
        # Should have 3 agents in the rearrange pattern
        assert len(workflow.agents) == 3
        assert workflow.flow == "company_researcher -> social_media_agent -> report_compiler"
    @patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
    def test_automated_testing_workflow_creation(self):
        """Test creation of automated testing workflow."""
        from examples.stagehand.stagehand_multi_agent_workflow import create_automated_testing_workflow
        workflow = create_automated_testing_workflow()
        # Should be a SequentialWorkflow
        assert len(workflow.agents) == 2
        # First should be concurrent testing
        assert hasattr(workflow.agents[0], 'agents')
        assert len(workflow.agents[0].agents) == 3  # UI, Form, Accessibility testers
    @patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
    def test_news_aggregation_workflow_creation(self):
        """Test creation of news aggregation workflow."""
        from examples.stagehand.stagehand_multi_agent_workflow import create_news_aggregation_workflow
        workflow = create_news_aggregation_workflow()
        # Should be a SequentialWorkflow with 3 stages
        assert len(workflow.agents) == 3
        # First stage should be concurrent scrapers
        assert hasattr(workflow.agents[0], 'agents')
        assert len(workflow.agents[0].agents) == 3  # 3 news sources
 # Integration tests
 class TestIntegration:
    """End-to-end integration tests."""
    @pytest.mark.asyncio
    @patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
    async def test_full_browser_automation_flow(self):
        """Test a complete browser automation flow."""
        from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
        agent = StagehandAgent(
            agent_name="IntegrationTestAgent",
            model_name="gpt-4o-mini",
            env="LOCAL",
        )
        # Test navigation
        nav_result = agent.run("Navigate to example.com")
        assert "navigated_to" in nav_result
        # Test extraction
        extract_result = agent.run("Extract all text from the page")
        assert "extracted" in extract_result
        # Test observation
        observe_result = agent.run("Find all buttons on the page")
        assert "observation" in observe_result
        # Cleanup
        agent.cleanup()
 if __name__ == "__main__":
    pytest.main([__file__, "-v"])