Merge pull request #1000 from filip-michalsky/add-stagehand

add stagehand example
pull/1011/head
Kye Gomez 4 weeks ago committed by GitHub
commit c0c9b7201a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,265 @@
"""
Stagehand Browser Automation Agent for Swarms
=============================================
This example demonstrates how to create a Swarms-compatible agent
that wraps Stagehand's browser automation capabilities.
The StagehandAgent class inherits from the Swarms Agent base class
and implements browser automation through natural language commands.
"""
import asyncio
import json
import os
from typing import Any, Dict, Optional
from dotenv import load_dotenv
from loguru import logger
from pydantic import BaseModel, Field
from swarms import Agent as SwarmsAgent
from stagehand import Stagehand, StagehandConfig
load_dotenv()
class WebData(BaseModel):
"""Schema for extracted web data."""
url: str = Field(..., description="The URL of the page")
title: str = Field(..., description="Page title")
content: str = Field(..., description="Extracted content")
metadata: Dict[str, Any] = Field(
default_factory=dict, description="Additional metadata"
)
class StagehandAgent(SwarmsAgent):
"""
A Swarms agent that integrates Stagehand for browser automation.
This agent can navigate websites, extract data, perform actions,
and observe page elements using natural language instructions.
"""
def __init__(
self,
agent_name: str = "StagehandBrowserAgent",
browserbase_api_key: Optional[str] = None,
browserbase_project_id: Optional[str] = None,
model_name: str = "gpt-4o-mini",
model_api_key: Optional[str] = None,
env: str = "LOCAL", # LOCAL or BROWSERBASE
*args,
**kwargs,
):
"""
Initialize the StagehandAgent.
Args:
agent_name: Name of the agent
browserbase_api_key: API key for Browserbase (if using cloud)
browserbase_project_id: Project ID for Browserbase
model_name: LLM model to use
model_api_key: API key for the model
env: Environment - LOCAL or BROWSERBASE
"""
# Don't pass stagehand-specific args to parent
super().__init__(agent_name=agent_name, *args, **kwargs)
self.stagehand_config = StagehandConfig(
env=env,
api_key=browserbase_api_key
or os.getenv("BROWSERBASE_API_KEY"),
project_id=browserbase_project_id
or os.getenv("BROWSERBASE_PROJECT_ID"),
model_name=model_name,
model_api_key=model_api_key
or os.getenv("OPENAI_API_KEY"),
)
self.stagehand = None
self._initialized = False
async def _init_stagehand(self):
"""Initialize Stagehand instance."""
if not self._initialized:
self.stagehand = Stagehand(self.stagehand_config)
await self.stagehand.init()
self._initialized = True
logger.info(
f"Stagehand initialized for {self.agent_name}"
)
async def _close_stagehand(self):
"""Close Stagehand instance."""
if self.stagehand and self._initialized:
await self.stagehand.close()
self._initialized = False
logger.info(f"Stagehand closed for {self.agent_name}")
def run(self, task: str, *args, **kwargs) -> str:
"""
Execute a browser automation task.
The task string should contain instructions like:
- "Navigate to example.com and extract the main content"
- "Go to google.com and search for 'AI agents'"
- "Extract all company names from https://ycombinator.com"
Args:
task: Natural language description of the browser task
Returns:
String result of the task execution
"""
return asyncio.run(self._async_run(task, *args, **kwargs))
async def _async_run(self, task: str, *args, **kwargs) -> str:
"""Async implementation of run method."""
try:
await self._init_stagehand()
# Parse the task to determine actions
result = await self._execute_browser_task(task)
return json.dumps(result, indent=2)
except Exception as e:
logger.error(f"Error in browser task: {str(e)}")
return f"Error executing browser task: {str(e)}"
finally:
# Keep browser open for potential follow-up tasks
pass
async def _execute_browser_task(
self, task: str
) -> Dict[str, Any]:
"""
Execute a browser task based on natural language instructions.
This method interprets the task and calls appropriate Stagehand methods.
"""
page = self.stagehand.page
result = {"task": task, "status": "completed", "data": {}}
# Determine if task involves navigation
if any(
keyword in task.lower()
for keyword in ["navigate", "go to", "visit", "open"]
):
# Extract URL from task
import re
url_pattern = r"https?://[^\s]+"
urls = re.findall(url_pattern, task)
if not urls and any(
domain in task for domain in [".com", ".org", ".net"]
):
# Try to extract domain names
domain_pattern = r"(\w+\.\w+)"
domains = re.findall(domain_pattern, task)
if domains:
urls = [f"https://{domain}" for domain in domains]
if urls:
url = urls[0]
await page.goto(url)
result["data"]["navigated_to"] = url
logger.info(f"Navigated to {url}")
# Determine action type
if "extract" in task.lower():
# Perform extraction
extraction_prompt = task.replace("extract", "").strip()
extracted = await page.extract(extraction_prompt)
result["data"]["extracted"] = extracted
result["action"] = "extract"
elif "click" in task.lower() or "press" in task.lower():
# Perform action
action_result = await page.act(task)
result["data"]["action_performed"] = str(action_result)
result["action"] = "act"
elif "search" in task.lower():
# Perform search action
search_query = (
task.split("search for")[-1].strip().strip("'\"")
)
# First, find the search box
search_box = await page.observe(
"find the search input field"
)
if search_box:
# Click on search box and type
await page.act(f"click on {search_box[0]}")
await page.act(f"type '{search_query}'")
await page.act("press Enter")
result["data"]["search_query"] = search_query
result["action"] = "search"
elif "observe" in task.lower() or "find" in task.lower():
# Perform observation
observation = await page.observe(task)
result["data"]["observation"] = [
{
"description": obs.description,
"selector": obs.selector,
}
for obs in observation
]
result["action"] = "observe"
else:
# General action
action_result = await page.act(task)
result["data"]["action_result"] = str(action_result)
result["action"] = "general"
return result
def cleanup(self):
"""Clean up browser resources."""
if self._initialized:
asyncio.run(self._close_stagehand())
def __del__(self):
"""Ensure browser is closed on deletion."""
self.cleanup()
# Example usage
if __name__ == "__main__":
# Create a Stagehand browser agent
browser_agent = StagehandAgent(
agent_name="WebScraperAgent",
model_name="gpt-4o-mini",
env="LOCAL", # Use LOCAL for Playwright, BROWSERBASE for cloud
)
# Example 1: Navigate and extract data
print("Example 1: Basic navigation and extraction")
result1 = browser_agent.run(
"Navigate to https://news.ycombinator.com and extract the titles of the top 5 stories"
)
print(result1)
print("\n" + "=" * 50 + "\n")
# Example 2: Perform a search
print("Example 2: Search on a website")
result2 = browser_agent.run(
"Go to google.com and search for 'Swarms AI framework'"
)
print(result2)
print("\n" + "=" * 50 + "\n")
# Example 3: Extract structured data
print("Example 3: Extract specific information")
result3 = browser_agent.run(
"Navigate to https://example.com and extract the main heading and first paragraph"
)
print(result3)
# Clean up
browser_agent.cleanup()

@ -0,0 +1,397 @@
"""
Stagehand Tools for Swarms Agent
=================================
This example demonstrates how to create Stagehand browser automation tools
that can be used by a standard Swarms Agent. Each Stagehand method (act,
extract, observe) becomes a separate tool that the agent can use.
This approach gives the agent more fine-grained control over browser
automation tasks.
"""
import asyncio
import json
import os
from typing import Optional
from dotenv import load_dotenv
from loguru import logger
from swarms import Agent
from stagehand import Stagehand, StagehandConfig
load_dotenv()
class BrowserState:
"""Singleton to manage browser state across tools."""
_instance = None
_stagehand = None
_initialized = False
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
async def init_browser(
self,
env: str = "LOCAL",
api_key: Optional[str] = None,
project_id: Optional[str] = None,
model_name: str = "gpt-4o-mini",
model_api_key: Optional[str] = None,
):
"""Initialize the browser if not already initialized."""
if not self._initialized:
config = StagehandConfig(
env=env,
api_key=api_key or os.getenv("BROWSERBASE_API_KEY"),
project_id=project_id
or os.getenv("BROWSERBASE_PROJECT_ID"),
model_name=model_name,
model_api_key=model_api_key
or os.getenv("OPENAI_API_KEY"),
)
self._stagehand = Stagehand(config)
await self._stagehand.init()
self._initialized = True
logger.info("Stagehand browser initialized")
async def get_page(self):
"""Get the current page instance."""
if not self._initialized:
raise RuntimeError(
"Browser not initialized. Call init_browser first."
)
return self._stagehand.page
async def close(self):
"""Close the browser."""
if self._initialized and self._stagehand:
await self._stagehand.close()
self._initialized = False
logger.info("Stagehand browser closed")
# Browser state instance
browser_state = BrowserState()
def navigate_browser(url: str) -> str:
"""
Navigate to a URL in the browser.
Args:
url (str): The URL to navigate to. Should be a valid URL starting with http:// or https://.
If no protocol is provided, https:// will be added automatically.
Returns:
str: Success message with the URL navigated to, or error message if navigation fails
Raises:
RuntimeError: If browser initialization fails
Exception: If navigation to the URL fails
Example:
>>> result = navigate_browser("https://example.com")
>>> print(result)
"Successfully navigated to https://example.com"
>>> result = navigate_browser("google.com")
>>> print(result)
"Successfully navigated to https://google.com"
"""
return asyncio.run(_navigate_browser_async(url))
async def _navigate_browser_async(url: str) -> str:
"""Async implementation of navigate_browser."""
try:
await browser_state.init_browser()
page = await browser_state.get_page()
# Ensure URL has protocol
if not url.startswith(("http://", "https://")):
url = f"https://{url}"
await page.goto(url)
return f"Successfully navigated to {url}"
except Exception as e:
logger.error(f"Navigation error: {str(e)}")
return f"Failed to navigate to {url}: {str(e)}"
def browser_act(action: str) -> str:
"""
Perform an action on the current web page using natural language.
Args:
action (str): Natural language description of the action to perform.
Examples: 'click the submit button', 'type hello@example.com in the email field',
'scroll down', 'press Enter', 'select option from dropdown'
Returns:
str: JSON formatted string with action result and status information
Raises:
RuntimeError: If browser is not initialized or page is not available
Exception: If the action cannot be performed on the current page
Example:
>>> result = browser_act("click the submit button")
>>> print(result)
"Action performed: click the submit button. Result: clicked successfully"
>>> result = browser_act("type hello@example.com in the email field")
>>> print(result)
"Action performed: type hello@example.com in the email field. Result: text entered"
"""
return asyncio.run(_browser_act_async(action))
async def _browser_act_async(action: str) -> str:
"""Async implementation of browser_act."""
try:
await browser_state.init_browser()
page = await browser_state.get_page()
result = await page.act(action)
return f"Action performed: {action}. Result: {result}"
except Exception as e:
logger.error(f"Action error: {str(e)}")
return f"Failed to perform action '{action}': {str(e)}"
def browser_extract(query: str) -> str:
"""
Extract information from the current web page using natural language.
Args:
query (str): Natural language description of what information to extract.
Examples: 'extract all email addresses', 'get the main article text',
'find all product prices', 'extract the page title and meta description'
Returns:
str: JSON formatted string containing the extracted information, or error message if extraction fails
Raises:
RuntimeError: If browser is not initialized or page is not available
Exception: If extraction fails due to page content or parsing issues
Example:
>>> result = browser_extract("extract all email addresses")
>>> print(result)
'["contact@example.com", "support@example.com"]'
>>> result = browser_extract("get the main article text")
>>> print(result)
'{"title": "Article Title", "content": "Article content..."}'
"""
return asyncio.run(_browser_extract_async(query))
async def _browser_extract_async(query: str) -> str:
"""Async implementation of browser_extract."""
try:
await browser_state.init_browser()
page = await browser_state.get_page()
extracted = await page.extract(query)
# Convert to JSON string for agent consumption
if isinstance(extracted, (dict, list)):
return json.dumps(extracted, indent=2)
else:
return str(extracted)
except Exception as e:
logger.error(f"Extraction error: {str(e)}")
return f"Failed to extract '{query}': {str(e)}"
def browser_observe(query: str) -> str:
"""
Observe and find elements on the current web page using natural language.
Args:
query (str): Natural language description of elements to find.
Examples: 'find the search box', 'locate the submit button',
'find all navigation links', 'observe form elements'
Returns:
str: JSON formatted string containing information about found elements including
their descriptions, selectors, and interaction methods
Raises:
RuntimeError: If browser is not initialized or page is not available
Exception: If observation fails due to page structure or element detection issues
Example:
>>> result = browser_observe("find the search box")
>>> print(result)
'[{"description": "Search input field", "selector": "#search", "method": "input"}]'
>>> result = browser_observe("locate the submit button")
>>> print(result)
'[{"description": "Submit button", "selector": "button[type=submit]", "method": "click"}]'
"""
return asyncio.run(_browser_observe_async(query))
async def _browser_observe_async(query: str) -> str:
"""Async implementation of browser_observe."""
try:
await browser_state.init_browser()
page = await browser_state.get_page()
observations = await page.observe(query)
# Format observations for readability
result = []
for obs in observations:
result.append(
{
"description": obs.description,
"selector": obs.selector,
"method": obs.method,
}
)
return json.dumps(result, indent=2)
except Exception as e:
logger.error(f"Observation error: {str(e)}")
return f"Failed to observe '{query}': {str(e)}"
def browser_screenshot(filename: str = "screenshot.png") -> str:
"""
Take a screenshot of the current web page.
Args:
filename (str, optional): The filename to save the screenshot to.
Defaults to "screenshot.png".
.png extension will be added automatically if not provided.
Returns:
str: Success message with the filename where screenshot was saved,
or error message if screenshot fails
Raises:
RuntimeError: If browser is not initialized or page is not available
Exception: If screenshot capture or file saving fails
Example:
>>> result = browser_screenshot()
>>> print(result)
"Screenshot saved to screenshot.png"
>>> result = browser_screenshot("page_capture.png")
>>> print(result)
"Screenshot saved to page_capture.png"
"""
return asyncio.run(_browser_screenshot_async(filename))
async def _browser_screenshot_async(filename: str) -> str:
"""Async implementation of browser_screenshot."""
try:
await browser_state.init_browser()
page = await browser_state.get_page()
# Ensure .png extension
if not filename.endswith(".png"):
filename += ".png"
# Get the underlying Playwright page
playwright_page = page.page
await playwright_page.screenshot(path=filename)
return f"Screenshot saved to {filename}"
except Exception as e:
logger.error(f"Screenshot error: {str(e)}")
return f"Failed to take screenshot: {str(e)}"
def close_browser() -> str:
"""
Close the browser when done with automation tasks.
Returns:
str: Success message if browser is closed successfully,
or error message if closing fails
Raises:
Exception: If browser closing process encounters errors
Example:
>>> result = close_browser()
>>> print(result)
"Browser closed successfully"
"""
return asyncio.run(_close_browser_async())
async def _close_browser_async() -> str:
"""Async implementation of close_browser."""
try:
await browser_state.close()
return "Browser closed successfully"
except Exception as e:
logger.error(f"Close browser error: {str(e)}")
return f"Failed to close browser: {str(e)}"
# Example usage
if __name__ == "__main__":
# Create a Swarms agent with browser tools
browser_agent = Agent(
agent_name="BrowserAutomationAgent",
model_name="gpt-4o-mini",
max_loops=1,
tools=[
navigate_browser,
browser_act,
browser_extract,
browser_observe,
browser_screenshot,
close_browser,
],
system_prompt="""You are a web browser automation specialist. You can:
1. Navigate to websites using the navigate_browser tool
2. Perform actions like clicking and typing using the browser_act tool
3. Extract information from pages using the browser_extract tool
4. Find and observe elements using the browser_observe tool
5. Take screenshots using the browser_screenshot tool
6. Close the browser when done using the close_browser tool
Always start by navigating to a URL before trying to interact with a page.
Be specific in your actions and extractions. When done with tasks, close the browser.""",
)
# Example 1: Research task
print("Example 1: Automated web research")
result1 = browser_agent.run(
"Go to hackernews (news.ycombinator.com) and extract the titles of the top 5 stories. Then take a screenshot."
)
print(result1)
print("\n" + "=" * 50 + "\n")
# Example 2: Search task
print("Example 2: Perform a web search")
result2 = browser_agent.run(
"Navigate to google.com, search for 'Python web scraping best practices', and extract the first 3 search result titles"
)
print(result2)
print("\n" + "=" * 50 + "\n")
# Example 3: Form interaction
print("Example 3: Interact with a form")
result3 = browser_agent.run(
"Go to example.com and observe what elements are on the page. Then extract all the text content."
)
print(result3)
# Clean up
browser_agent.run("Close the browser")

@ -0,0 +1,263 @@
"""
Stagehand MCP Server Integration with Swarms
============================================
This example demonstrates how to use the Stagehand MCP (Model Context Protocol)
server with Swarms agents. The MCP server provides browser automation capabilities
as standardized tools that can be discovered and used by agents.
Prerequisites:
1. Install and run the Stagehand MCP server:
cd stagehand-mcp-server
npm install
npm run build
npm start
2. The server will start on http://localhost:3000/sse
Features:
- Automatic tool discovery from MCP server
- Multi-session browser management
- Built-in screenshot resources
- Prompt templates for common tasks
"""
from typing import List
from dotenv import load_dotenv
from loguru import logger
from swarms import Agent
load_dotenv()
class StagehandMCPAgent:
"""
A Swarms agent that connects to the Stagehand MCP server
for browser automation capabilities.
"""
def __init__(
self,
agent_name: str = "StagehandMCPAgent",
mcp_server_url: str = "http://localhost:3000/sse",
model_name: str = "gpt-4o-mini",
max_loops: int = 1,
):
"""
Initialize the Stagehand MCP Agent.
Args:
agent_name: Name of the agent
mcp_server_url: URL of the Stagehand MCP server
model_name: LLM model to use
max_loops: Maximum number of reasoning loops
"""
self.agent = Agent(
agent_name=agent_name,
model_name=model_name,
max_loops=max_loops,
# Connect to the Stagehand MCP server
mcp_url=mcp_server_url,
system_prompt="""You are a web browser automation specialist with access to Stagehand MCP tools.
Available tools from the MCP server:
- navigate: Navigate to a URL
- act: Perform actions on web pages (click, type, etc.)
- extract: Extract data from web pages
- observe: Find and observe elements on pages
- screenshot: Take screenshots
- createSession: Create new browser sessions for parallel tasks
- listSessions: List active browser sessions
- closeSession: Close browser sessions
For multi-page workflows, you can create multiple sessions.
Always be specific in your actions and extractions.
Remember to close sessions when done with them.""",
verbose=True,
)
def run(self, task: str) -> str:
"""Run a browser automation task."""
return self.agent.run(task)
class MultiSessionBrowserSwarm:
"""
A multi-agent swarm that uses multiple browser sessions
for parallel web automation tasks.
"""
def __init__(
self,
mcp_server_url: str = "http://localhost:3000/sse",
num_agents: int = 3,
):
"""
Initialize a swarm of browser automation agents.
Args:
mcp_server_url: URL of the Stagehand MCP server
num_agents: Number of agents to create
"""
self.agents = []
# Create specialized agents for different tasks
agent_roles = [
(
"DataExtractor",
"You specialize in extracting structured data from websites.",
),
(
"FormFiller",
"You specialize in filling out forms and interacting with web applications.",
),
(
"WebMonitor",
"You specialize in monitoring websites for changes and capturing screenshots.",
),
]
for i in range(min(num_agents, len(agent_roles))):
name, specialization = agent_roles[i]
agent = Agent(
agent_name=f"{name}_{i}",
model_name="gpt-4o-mini",
max_loops=1,
mcp_url=mcp_server_url,
system_prompt=f"""You are a web browser automation specialist. {specialization}
You have access to Stagehand MCP tools including:
- createSession: Create a new browser session
- navigate_session: Navigate to URLs in a specific session
- act_session: Perform actions in a specific session
- extract_session: Extract data from a specific session
- observe_session: Observe elements in a specific session
- closeSession: Close a session when done
Always create your own session for tasks to work independently from other agents.""",
verbose=True,
)
self.agents.append(agent)
def distribute_tasks(self, tasks: List[str]) -> List[str]:
"""Distribute tasks among agents."""
results = []
# Distribute tasks round-robin among agents
for i, task in enumerate(tasks):
agent_idx = i % len(self.agents)
agent = self.agents[agent_idx]
logger.info(
f"Assigning task to {agent.agent_name}: {task}"
)
result = agent.run(task)
results.append(result)
return results
# Example usage
if __name__ == "__main__":
print("=" * 70)
print("Stagehand MCP Server Integration Examples")
print("=" * 70)
print(
"\nMake sure the Stagehand MCP server is running on http://localhost:3000/sse"
)
print("Run: cd stagehand-mcp-server && npm start\n")
# Example 1: Single agent with MCP tools
print("\nExample 1: Single Agent with MCP Tools")
print("-" * 40)
mcp_agent = StagehandMCPAgent(
agent_name="WebResearchAgent",
mcp_server_url="http://localhost:3000/sse",
)
# Research task using MCP tools
result1 = mcp_agent.run(
"""Navigate to news.ycombinator.com and extract the following:
1. The titles of the top 5 stories
2. Their points/scores
3. Number of comments for each
Then take a screenshot of the page."""
)
print(f"Result: {result1}")
print("\n" + "=" * 70 + "\n")
# Example 2: Multi-session parallel browsing
print("Example 2: Multi-Session Parallel Browsing")
print("-" * 40)
parallel_agent = StagehandMCPAgent(
agent_name="ParallelBrowserAgent",
mcp_server_url="http://localhost:3000/sse",
)
result2 = parallel_agent.run(
"""Create 3 browser sessions and perform these tasks in parallel:
1. Session 1: Go to github.com/trending and extract the top 3 trending repositories
2. Session 2: Go to reddit.com/r/programming and extract the top 3 posts
3. Session 3: Go to stackoverflow.com and extract the featured questions
After extracting data from all sessions, close them."""
)
print(f"Result: {result2}")
print("\n" + "=" * 70 + "\n")
# Example 3: Multi-agent browser swarm
print("Example 3: Multi-Agent Browser Swarm")
print("-" * 40)
# Create a swarm of specialized browser agents
browser_swarm = MultiSessionBrowserSwarm(
mcp_server_url="http://localhost:3000/sse",
num_agents=3,
)
# Define tasks for the swarm
swarm_tasks = [
"Create a session, navigate to python.org, and extract information about the latest Python version and its key features",
"Create a session, go to npmjs.com, search for 'stagehand', and extract information about the package including version and description",
"Create a session, visit playwright.dev, and extract the main features and benefits listed on the homepage",
]
print("Distributing tasks to browser swarm...")
swarm_results = browser_swarm.distribute_tasks(swarm_tasks)
for i, result in enumerate(swarm_results):
print(f"\nTask {i+1} Result: {result}")
print("\n" + "=" * 70 + "\n")
# Example 4: Complex workflow with session management
print("Example 4: Complex Multi-Page Workflow")
print("-" * 40)
workflow_agent = StagehandMCPAgent(
agent_name="WorkflowAgent",
mcp_server_url="http://localhost:3000/sse",
max_loops=2, # Allow more complex reasoning
)
result4 = workflow_agent.run(
"""Perform a comprehensive analysis of AI frameworks:
1. Create a new session
2. Navigate to github.com/huggingface/transformers and extract the star count and latest release info
3. In the same session, navigate to github.com/openai/gpt-3 and extract similar information
4. Navigate to github.com/anthropics/anthropic-sdk-python and extract repository statistics
5. Take screenshots of each repository page
6. Compile a comparison report of all three repositories
7. Close the session when done"""
)
print(f"Result: {result4}")
print("\n" + "=" * 70)
print("All examples completed!")
print("=" * 70)

@ -0,0 +1,371 @@
"""
Stagehand Multi-Agent Browser Automation Workflows
=================================================
This example demonstrates advanced multi-agent workflows using Stagehand
for complex browser automation scenarios. It shows how multiple agents
can work together to accomplish sophisticated web tasks.
Use cases:
1. E-commerce price monitoring across multiple sites
2. Competitive analysis and market research
3. Automated testing and validation workflows
4. Data aggregation from multiple sources
"""
from datetime import datetime
from typing import Dict, List, Optional
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from swarms import Agent, SequentialWorkflow, ConcurrentWorkflow
from swarms.structs.agent_rearrange import AgentRearrange
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
load_dotenv()
# Pydantic models for structured data
class ProductInfo(BaseModel):
"""Product information schema."""
name: str = Field(..., description="Product name")
price: float = Field(..., description="Product price")
availability: str = Field(..., description="Availability status")
url: str = Field(..., description="Product URL")
screenshot_path: Optional[str] = Field(
None, description="Screenshot file path"
)
class MarketAnalysis(BaseModel):
"""Market analysis report schema."""
timestamp: datetime = Field(default_factory=datetime.now)
products: List[ProductInfo] = Field(
..., description="List of products analyzed"
)
price_range: Dict[str, float] = Field(
..., description="Min and max prices"
)
recommendations: List[str] = Field(
..., description="Analysis recommendations"
)
# Specialized browser agents
class ProductScraperAgent(StagehandAgent):
"""Specialized agent for scraping product information."""
def __init__(self, site_name: str, *args, **kwargs):
super().__init__(
agent_name=f"ProductScraper_{site_name}", *args, **kwargs
)
self.site_name = site_name
class PriceMonitorAgent(StagehandAgent):
"""Specialized agent for monitoring price changes."""
def __init__(self, *args, **kwargs):
super().__init__(
agent_name="PriceMonitorAgent", *args, **kwargs
)
# Example 1: E-commerce Price Comparison Workflow
def create_price_comparison_workflow():
"""
Create a workflow that compares prices across multiple e-commerce sites.
"""
# Create specialized agents for different sites
amazon_agent = StagehandAgent(
agent_name="AmazonScraperAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
ebay_agent = StagehandAgent(
agent_name="EbayScraperAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
analysis_agent = Agent(
agent_name="PriceAnalysisAgent",
model_name="gpt-4o-mini",
system_prompt="""You are a price analysis expert. Analyze product prices from multiple sources
and provide insights on the best deals, price trends, and recommendations.
Focus on value for money and highlight any significant price differences.""",
)
# Create concurrent workflow for parallel scraping
scraping_workflow = ConcurrentWorkflow(
agents=[amazon_agent, ebay_agent],
max_loops=1,
verbose=True,
)
# Create sequential workflow: scrape -> analyze
full_workflow = SequentialWorkflow(
agents=[scraping_workflow, analysis_agent],
max_loops=1,
verbose=True,
)
return full_workflow
# Example 2: Competitive Analysis Workflow
def create_competitive_analysis_workflow():
"""
Create a workflow for competitive analysis across multiple company websites.
"""
# Agent for extracting company information
company_researcher = StagehandAgent(
agent_name="CompanyResearchAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
# Agent for analyzing social media presence
social_media_agent = StagehandAgent(
agent_name="SocialMediaAnalysisAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
# Agent for compiling competitive analysis report
report_compiler = Agent(
agent_name="CompetitiveAnalysisReporter",
model_name="gpt-4o-mini",
system_prompt="""You are a competitive analysis expert. Compile comprehensive reports
based on company information and social media presence data. Identify strengths,
weaknesses, and market positioning for each company.""",
)
# Create agent rearrange for flexible routing
workflow_pattern = (
"company_researcher -> social_media_agent -> report_compiler"
)
competitive_workflow = AgentRearrange(
agents=[
company_researcher,
social_media_agent,
report_compiler,
],
flow=workflow_pattern,
verbose=True,
)
return competitive_workflow
# Example 3: Automated Testing Workflow
def create_automated_testing_workflow():
"""
Create a workflow for automated web application testing.
"""
# Agent for UI testing
ui_tester = StagehandAgent(
agent_name="UITestingAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
# Agent for form validation testing
form_tester = StagehandAgent(
agent_name="FormValidationAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
# Agent for accessibility testing
accessibility_tester = StagehandAgent(
agent_name="AccessibilityTestingAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
# Agent for compiling test results
test_reporter = Agent(
agent_name="TestReportCompiler",
model_name="gpt-4o-mini",
system_prompt="""You are a QA test report specialist. Compile test results from
UI, form validation, and accessibility testing into a comprehensive report.
Highlight any failures, warnings, and provide recommendations for fixes.""",
)
# Concurrent testing followed by report generation
testing_workflow = ConcurrentWorkflow(
agents=[ui_tester, form_tester, accessibility_tester],
max_loops=1,
verbose=True,
)
full_test_workflow = SequentialWorkflow(
agents=[testing_workflow, test_reporter],
max_loops=1,
verbose=True,
)
return full_test_workflow
# Example 4: News Aggregation and Sentiment Analysis
def create_news_aggregation_workflow():
"""
Create a workflow for news aggregation and sentiment analysis.
"""
# Multiple news scraper agents
news_scrapers = []
news_sites = [
("TechCrunch", "https://techcrunch.com"),
("HackerNews", "https://news.ycombinator.com"),
("Reddit", "https://reddit.com/r/technology"),
]
for site_name, url in news_sites:
scraper = StagehandAgent(
agent_name=f"{site_name}Scraper",
model_name="gpt-4o-mini",
env="LOCAL",
)
news_scrapers.append(scraper)
# Sentiment analysis agent
sentiment_analyzer = Agent(
agent_name="SentimentAnalyzer",
model_name="gpt-4o-mini",
system_prompt="""You are a sentiment analysis expert. Analyze news articles and posts
to determine overall sentiment (positive, negative, neutral) and identify key themes
and trends in the technology sector.""",
)
# Trend identification agent
trend_identifier = Agent(
agent_name="TrendIdentifier",
model_name="gpt-4o-mini",
system_prompt="""You are a trend analysis expert. Based on aggregated news and sentiment
data, identify emerging trends, hot topics, and potential market movements in the
technology sector.""",
)
# Create workflow: parallel scraping -> sentiment analysis -> trend identification
scraping_workflow = ConcurrentWorkflow(
agents=news_scrapers,
max_loops=1,
verbose=True,
)
analysis_workflow = SequentialWorkflow(
agents=[
scraping_workflow,
sentiment_analyzer,
trend_identifier,
],
max_loops=1,
verbose=True,
)
return analysis_workflow
# Main execution examples
if __name__ == "__main__":
print("=" * 70)
print("Stagehand Multi-Agent Workflow Examples")
print("=" * 70)
# Example 1: Price Comparison
print("\nExample 1: E-commerce Price Comparison")
print("-" * 40)
price_workflow = create_price_comparison_workflow()
# Search for a specific product across multiple sites
price_result = price_workflow.run(
"""Search for 'iPhone 15 Pro Max 256GB' on:
1. Amazon - extract price, availability, and seller information
2. eBay - extract price range, number of listings, and average price
Take screenshots of search results from both sites.
Compare the prices and provide recommendations on where to buy."""
)
print(f"Price Comparison Result:\n{price_result}")
print("\n" + "=" * 70 + "\n")
# Example 2: Competitive Analysis
print("Example 2: Competitive Analysis")
print("-" * 40)
competitive_workflow = create_competitive_analysis_workflow()
competitive_result = competitive_workflow.run(
"""Analyze these three AI companies:
1. OpenAI - visit openai.com and extract mission, products, and recent announcements
2. Anthropic - visit anthropic.com and extract their AI safety approach and products
3. DeepMind - visit deepmind.com and extract research focus and achievements
Then check their Twitter/X presence and recent posts.
Compile a competitive analysis report comparing their market positioning."""
)
print(f"Competitive Analysis Result:\n{competitive_result}")
print("\n" + "=" * 70 + "\n")
# Example 3: Automated Testing
print("Example 3: Automated Web Testing")
print("-" * 40)
testing_workflow = create_automated_testing_workflow()
test_result = testing_workflow.run(
"""Test the website example.com:
1. UI Testing: Check if all main navigation links work, images load, and layout is responsive
2. Form Testing: If there are any forms, test with valid and invalid inputs
3. Accessibility: Check for alt texts, ARIA labels, and keyboard navigation
Take screenshots of any issues found and compile a comprehensive test report."""
)
print(f"Test Results:\n{test_result}")
print("\n" + "=" * 70 + "\n")
# Example 4: News Aggregation
print("Example 4: Tech News Aggregation and Analysis")
print("-" * 40)
news_workflow = create_news_aggregation_workflow()
news_result = news_workflow.run(
"""For each news source:
1. TechCrunch: Extract the top 5 headlines about AI or machine learning
2. HackerNews: Extract the top 5 posts related to AI/ML with most points
3. Reddit r/technology: Extract top 5 posts about AI from the past week
Analyze sentiment and identify emerging trends in AI technology."""
)
print(f"News Analysis Result:\n{news_result}")
# Cleanup all browser instances
print("\n" + "=" * 70)
print("Cleaning up browser instances...")
# Clean up agents
for agent in price_workflow.agents:
if isinstance(agent, StagehandAgent):
agent.cleanup()
elif hasattr(agent, "agents"): # For nested workflows
for sub_agent in agent.agents:
if isinstance(sub_agent, StagehandAgent):
sub_agent.cleanup()
print("All workflows completed!")
print("=" * 70)

@ -0,0 +1,249 @@
# Stagehand Browser Automation Integration for Swarms
This directory contains examples demonstrating how to integrate [Stagehand](https://github.com/browserbase/stagehand), an AI-powered browser automation framework, with the Swarms multi-agent framework.
## Overview
Stagehand provides natural language browser automation capabilities that can be seamlessly integrated into Swarms agents. This integration enables:
- 🌐 **Natural Language Web Automation**: Use simple commands like "click the submit button" or "extract product prices"
- 🤖 **Multi-Agent Browser Workflows**: Multiple agents can automate different websites simultaneously
- 🔧 **Flexible Integration Options**: Use as a wrapped agent, individual tools, or via MCP server
- 📊 **Complex Automation Scenarios**: E-commerce monitoring, competitive analysis, automated testing, and more
## Examples
### 1. Stagehand Wrapper Agent (`1_stagehand_wrapper_agent.py`)
The simplest integration - wraps Stagehand as a Swarms-compatible agent.
```python
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
# Create a browser automation agent
browser_agent = StagehandAgent(
agent_name="WebScraperAgent",
model_name="gpt-4o-mini",
env="LOCAL", # or "BROWSERBASE" for cloud execution
)
# Use natural language to control the browser
result = browser_agent.run(
"Navigate to news.ycombinator.com and extract the top 5 story titles"
)
```
**Features:**
- Inherits from Swarms `Agent` base class
- Automatic browser lifecycle management
- Natural language task interpretation
- Support for both local (Playwright) and cloud (Browserbase) execution
### 2. Stagehand as Tools (`2_stagehand_tools_agent.py`)
Provides fine-grained control by exposing Stagehand methods as individual tools.
```python
from swarms import Agent
from examples.stagehand.stagehand_tools_agent import (
NavigateTool, ActTool, ExtractTool, ObserveTool, ScreenshotTool
)
# Create agent with browser tools
browser_agent = Agent(
agent_name="BrowserAutomationAgent",
model_name="gpt-4o-mini",
tools=[
NavigateTool(),
ActTool(),
ExtractTool(),
ObserveTool(),
ScreenshotTool(),
],
)
# Agent can now use tools strategically
result = browser_agent.run(
"Go to google.com, search for 'Python tutorials', and extract the first 3 results"
)
```
**Available Tools:**
- `NavigateTool`: Navigate to URLs
- `ActTool`: Perform actions (click, type, scroll)
- `ExtractTool`: Extract data from pages
- `ObserveTool`: Find elements on pages
- `ScreenshotTool`: Capture screenshots
- `CloseBrowserTool`: Clean up browser resources
### 3. Stagehand MCP Server (`3_stagehand_mcp_agent.py`)
Integrates with Stagehand's Model Context Protocol (MCP) server for standardized tool access.
```python
from examples.stagehand.stagehand_mcp_agent import StagehandMCPAgent
# Connect to Stagehand MCP server
mcp_agent = StagehandMCPAgent(
agent_name="WebResearchAgent",
mcp_server_url="http://localhost:3000/sse",
)
# Use MCP tools including multi-session management
result = mcp_agent.run("""
Create 3 browser sessions and:
1. Session 1: Check Python.org for latest version
2. Session 2: Check PyPI for trending packages
3. Session 3: Check GitHub Python trending repos
Compile a Python ecosystem status report.
""")
```
**MCP Features:**
- Automatic tool discovery
- Multi-session browser management
- Built-in screenshot resources
- Prompt templates for common tasks
### 4. Multi-Agent Workflows (`4_stagehand_multi_agent_workflow.py`)
Demonstrates complex multi-agent browser automation scenarios.
```python
from examples.stagehand.stagehand_multi_agent_workflow import (
create_price_comparison_workflow,
create_competitive_analysis_workflow,
create_automated_testing_workflow,
create_news_aggregation_workflow
)
# Price comparison across multiple e-commerce sites
price_workflow = create_price_comparison_workflow()
result = price_workflow.run(
"Compare prices for iPhone 15 Pro on Amazon and eBay"
)
# Competitive analysis of multiple companies
competitive_workflow = create_competitive_analysis_workflow()
result = competitive_workflow.run(
"Analyze OpenAI, Anthropic, and DeepMind websites and social media"
)
```
**Workflow Examples:**
- **E-commerce Monitoring**: Track prices across multiple sites
- **Competitive Analysis**: Research competitors' websites and social media
- **Automated Testing**: UI, form validation, and accessibility testing
- **News Aggregation**: Collect and analyze news from multiple sources
## Setup
### Prerequisites
1. **Install Swarms and Stagehand:**
```bash
pip install swarms stagehand
```
2. **Set up environment variables:**
```bash
# For local browser automation (using Playwright)
export OPENAI_API_KEY="your-openai-key"
# For cloud browser automation (using Browserbase)
export BROWSERBASE_API_KEY="your-browserbase-key"
export BROWSERBASE_PROJECT_ID="your-project-id"
```
3. **For MCP Server examples:**
```bash
# Install and run the Stagehand MCP server
cd stagehand-mcp-server
npm install
npm run build
npm start
```
## Use Cases
### E-commerce Automation
- Price monitoring and comparison
- Inventory tracking
- Automated purchasing workflows
- Review aggregation
### Research and Analysis
- Competitive intelligence gathering
- Market research automation
- Social media monitoring
- News and trend analysis
### Quality Assurance
- Automated UI testing
- Cross-browser compatibility testing
- Form validation testing
- Accessibility compliance checking
### Data Collection
- Web scraping at scale
- Real-time data monitoring
- Structured data extraction
- Screenshot documentation
## Best Practices
1. **Resource Management**: Always clean up browser instances when done
```python
browser_agent.cleanup() # For wrapper agents
```
2. **Error Handling**: Stagehand includes self-healing capabilities, but wrap critical operations in try-except blocks
3. **Parallel Execution**: Use `ConcurrentWorkflow` for simultaneous browser automation across multiple sites
4. **Session Management**: For complex multi-page workflows, use the MCP server's session management capabilities
5. **Rate Limiting**: Be respectful of websites - add delays between requests when necessary
## Testing
Run the test suite to verify the integration:
```bash
pytest tests/stagehand/test_stagehand_integration.py -v
```
## Troubleshooting
### Common Issues
1. **Browser not starting**: Ensure Playwright is properly installed
```bash
playwright install
```
2. **MCP connection failed**: Verify the MCP server is running on the correct port
3. **Timeout errors**: Increase timeout in StagehandConfig or agent initialization
### Debug Mode
Enable verbose logging:
```python
agent = StagehandAgent(
agent_name="DebugAgent",
verbose=True, # Enable detailed logging
)
```
## Contributing
We welcome contributions! Please:
1. Follow the existing code style
2. Add tests for new features
3. Update documentation
4. Submit PRs with clear descriptions
## License
These examples are provided under the same license as the Swarms framework. Stagehand is licensed separately - see [Stagehand's repository](https://github.com/browserbase/stagehand) for details.

@ -0,0 +1,13 @@
# Requirements for Stagehand integration examples
swarms>=8.0.0
stagehand>=0.1.0
python-dotenv>=1.0.0
pydantic>=2.0.0
loguru>=0.7.0
# For MCP server examples (optional)
httpx>=0.24.0
# For testing
pytest>=7.0.0
pytest-asyncio>=0.21.0

@ -0,0 +1,436 @@
"""
Tests for Stagehand Integration with Swarms
==========================================
This module contains tests for the Stagehand browser automation
integration with the Swarms framework.
"""
import json
import pytest
from unittest.mock import AsyncMock, patch
# Mock Stagehand classes
class MockObserveResult:
def __init__(self, description, selector, method="click"):
self.description = description
self.selector = selector
self.method = method
class MockStagehandPage:
async def goto(self, url):
return None
async def act(self, action):
return f"Performed action: {action}"
async def extract(self, query):
return {"extracted": query, "data": ["item1", "item2"]}
async def observe(self, query):
return [
MockObserveResult("Search box", "#search-input"),
MockObserveResult("Submit button", "#submit-btn"),
]
class MockStagehand:
def __init__(self, config):
self.config = config
self.page = MockStagehandPage()
async def init(self):
pass
async def close(self):
pass
# Test StagehandAgent wrapper
class TestStagehandAgent:
"""Test the StagehandAgent wrapper class."""
@patch(
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
MockStagehand,
)
def test_agent_initialization(self):
"""Test that StagehandAgent initializes correctly."""
from examples.stagehand.stagehand_wrapper_agent import (
StagehandAgent,
)
agent = StagehandAgent(
agent_name="TestAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
assert agent.agent_name == "TestAgent"
assert agent.stagehand_config.env == "LOCAL"
assert agent.stagehand_config.model_name == "gpt-4o-mini"
assert not agent._initialized
@patch(
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
MockStagehand,
)
def test_navigation_task(self):
"""Test navigation and extraction task."""
from examples.stagehand.stagehand_wrapper_agent import (
StagehandAgent,
)
agent = StagehandAgent(
agent_name="TestAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
result = agent.run(
"Navigate to example.com and extract the main content"
)
# Parse result
result_data = json.loads(result)
assert result_data["status"] == "completed"
assert "navigated_to" in result_data["data"]
assert (
result_data["data"]["navigated_to"]
== "https://example.com"
)
assert "extracted" in result_data["data"]
@patch(
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
MockStagehand,
)
def test_search_task(self):
"""Test search functionality."""
from examples.stagehand.stagehand_wrapper_agent import (
StagehandAgent,
)
agent = StagehandAgent(
agent_name="TestAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
result = agent.run(
"Go to google.com and search for 'test query'"
)
result_data = json.loads(result)
assert result_data["status"] == "completed"
assert result_data["data"]["search_query"] == "test query"
assert result_data["action"] == "search"
@patch(
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
MockStagehand,
)
def test_cleanup(self):
"""Test that cleanup properly closes browser."""
from examples.stagehand.stagehand_wrapper_agent import (
StagehandAgent,
)
agent = StagehandAgent(
agent_name="TestAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
# Initialize the agent
agent.run("Navigate to example.com")
assert agent._initialized
# Cleanup
agent.cleanup()
# After cleanup, should be able to run again
result = agent.run("Navigate to example.com")
assert result is not None
# Test Stagehand Tools
class TestStagehandTools:
"""Test individual Stagehand tools."""
@patch("examples.stagehand.stagehand_tools_agent.browser_state")
async def test_navigate_tool(self, mock_browser_state):
"""Test NavigateTool functionality."""
from examples.stagehand.stagehand_tools_agent import (
NavigateTool,
)
# Setup mock
mock_page = AsyncMock()
mock_browser_state.get_page = AsyncMock(
return_value=mock_page
)
mock_browser_state.init_browser = AsyncMock()
tool = NavigateTool()
result = await tool._async_run("https://example.com")
assert (
"Successfully navigated to https://example.com" in result
)
mock_page.goto.assert_called_once_with("https://example.com")
@patch("examples.stagehand.stagehand_tools_agent.browser_state")
async def test_act_tool(self, mock_browser_state):
"""Test ActTool functionality."""
from examples.stagehand.stagehand_tools_agent import ActTool
# Setup mock
mock_page = AsyncMock()
mock_page.act = AsyncMock(return_value="Action completed")
mock_browser_state.get_page = AsyncMock(
return_value=mock_page
)
mock_browser_state.init_browser = AsyncMock()
tool = ActTool()
result = await tool._async_run("click the button")
assert "Action performed" in result
assert "click the button" in result
mock_page.act.assert_called_once_with("click the button")
@patch("examples.stagehand.stagehand_tools_agent.browser_state")
async def test_extract_tool(self, mock_browser_state):
"""Test ExtractTool functionality."""
from examples.stagehand.stagehand_tools_agent import (
ExtractTool,
)
# Setup mock
mock_page = AsyncMock()
mock_page.extract = AsyncMock(
return_value={
"title": "Test Page",
"content": "Test content",
}
)
mock_browser_state.get_page = AsyncMock(
return_value=mock_page
)
mock_browser_state.init_browser = AsyncMock()
tool = ExtractTool()
result = await tool._async_run("extract the page title")
# Result should be JSON string
parsed_result = json.loads(result)
assert parsed_result["title"] == "Test Page"
assert parsed_result["content"] == "Test content"
@patch("examples.stagehand.stagehand_tools_agent.browser_state")
async def test_observe_tool(self, mock_browser_state):
"""Test ObserveTool functionality."""
from examples.stagehand.stagehand_tools_agent import (
ObserveTool,
)
# Setup mock
mock_page = AsyncMock()
mock_observations = [
MockObserveResult("Search input", "#search"),
MockObserveResult("Submit button", "#submit"),
]
mock_page.observe = AsyncMock(return_value=mock_observations)
mock_browser_state.get_page = AsyncMock(
return_value=mock_page
)
mock_browser_state.init_browser = AsyncMock()
tool = ObserveTool()
result = await tool._async_run("find the search box")
# Result should be JSON string
parsed_result = json.loads(result)
assert len(parsed_result) == 2
assert parsed_result[0]["description"] == "Search input"
assert parsed_result[0]["selector"] == "#search"
# Test MCP integration
class TestStagehandMCP:
"""Test Stagehand MCP server integration."""
def test_mcp_agent_initialization(self):
"""Test that MCP agent initializes with correct parameters."""
from examples.stagehand.stagehand_mcp_agent import (
StagehandMCPAgent,
)
mcp_agent = StagehandMCPAgent(
agent_name="TestMCPAgent",
mcp_server_url="http://localhost:3000/sse",
model_name="gpt-4o-mini",
)
assert mcp_agent.agent.agent_name == "TestMCPAgent"
assert mcp_agent.agent.mcp_url == "http://localhost:3000/sse"
assert mcp_agent.agent.model_name == "gpt-4o-mini"
def test_multi_session_swarm_creation(self):
"""Test multi-session browser swarm creation."""
from examples.stagehand.stagehand_mcp_agent import (
MultiSessionBrowserSwarm,
)
swarm = MultiSessionBrowserSwarm(
mcp_server_url="http://localhost:3000/sse",
num_agents=3,
)
assert len(swarm.agents) == 3
assert swarm.agents[0].agent_name == "DataExtractor_0"
assert swarm.agents[1].agent_name == "FormFiller_1"
assert swarm.agents[2].agent_name == "WebMonitor_2"
@patch("swarms.Agent.run")
def test_task_distribution(self, mock_run):
"""Test task distribution among swarm agents."""
from examples.stagehand.stagehand_mcp_agent import (
MultiSessionBrowserSwarm,
)
mock_run.return_value = "Task completed"
swarm = MultiSessionBrowserSwarm(num_agents=2)
tasks = ["Task 1", "Task 2", "Task 3"]
results = swarm.distribute_tasks(tasks)
assert len(results) == 3
assert all(result == "Task completed" for result in results)
assert mock_run.call_count == 3
# Test multi-agent workflows
class TestMultiAgentWorkflows:
"""Test multi-agent workflow configurations."""
@patch(
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
MockStagehand,
)
def test_price_comparison_workflow_creation(self):
"""Test creation of price comparison workflow."""
from examples.stagehand.stagehand_multi_agent_workflow import (
create_price_comparison_workflow,
)
workflow = create_price_comparison_workflow()
# Should be a SequentialWorkflow with 2 agents
assert len(workflow.agents) == 2
# First agent should be a ConcurrentWorkflow
assert hasattr(workflow.agents[0], "agents")
# Second agent should be the analysis agent
assert workflow.agents[1].agent_name == "PriceAnalysisAgent"
@patch(
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
MockStagehand,
)
def test_competitive_analysis_workflow_creation(self):
"""Test creation of competitive analysis workflow."""
from examples.stagehand.stagehand_multi_agent_workflow import (
create_competitive_analysis_workflow,
)
workflow = create_competitive_analysis_workflow()
# Should have 3 agents in the rearrange pattern
assert len(workflow.agents) == 3
assert (
workflow.flow
== "company_researcher -> social_media_agent -> report_compiler"
)
@patch(
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
MockStagehand,
)
def test_automated_testing_workflow_creation(self):
"""Test creation of automated testing workflow."""
from examples.stagehand.stagehand_multi_agent_workflow import (
create_automated_testing_workflow,
)
workflow = create_automated_testing_workflow()
# Should be a SequentialWorkflow
assert len(workflow.agents) == 2
# First should be concurrent testing
assert hasattr(workflow.agents[0], "agents")
assert (
len(workflow.agents[0].agents) == 3
) # UI, Form, Accessibility testers
@patch(
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
MockStagehand,
)
def test_news_aggregation_workflow_creation(self):
"""Test creation of news aggregation workflow."""
from examples.stagehand.stagehand_multi_agent_workflow import (
create_news_aggregation_workflow,
)
workflow = create_news_aggregation_workflow()
# Should be a SequentialWorkflow with 3 stages
assert len(workflow.agents) == 3
# First stage should be concurrent scrapers
assert hasattr(workflow.agents[0], "agents")
assert len(workflow.agents[0].agents) == 3 # 3 news sources
# Integration tests
class TestIntegration:
"""End-to-end integration tests."""
@pytest.mark.asyncio
@patch(
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
MockStagehand,
)
async def test_full_browser_automation_flow(self):
"""Test a complete browser automation flow."""
from examples.stagehand.stagehand_wrapper_agent import (
StagehandAgent,
)
agent = StagehandAgent(
agent_name="IntegrationTestAgent",
model_name="gpt-4o-mini",
env="LOCAL",
)
# Test navigation
nav_result = agent.run("Navigate to example.com")
assert "navigated_to" in nav_result
# Test extraction
extract_result = agent.run("Extract all text from the page")
assert "extracted" in extract_result
# Test observation
observe_result = agent.run("Find all buttons on the page")
assert "observation" in observe_result
# Cleanup
agent.cleanup()
if __name__ == "__main__":
pytest.main([__file__, "-v"])

@ -0,0 +1,302 @@
"""
Simple tests for Stagehand Integration with Swarms
=================================================
These tests verify the basic structure and functionality of the
Stagehand integration without requiring external dependencies.
"""
import json
import pytest
from unittest.mock import MagicMock
class TestStagehandIntegrationStructure:
"""Test that integration files have correct structure."""
def test_examples_directory_exists(self):
"""Test that examples directory structure is correct."""
import os
base_path = "examples/stagehand"
assert os.path.exists(base_path)
expected_files = [
"1_stagehand_wrapper_agent.py",
"2_stagehand_tools_agent.py",
"3_stagehand_mcp_agent.py",
"4_stagehand_multi_agent_workflow.py",
"README.md",
"requirements.txt",
]
for file in expected_files:
file_path = os.path.join(base_path, file)
assert os.path.exists(file_path), f"Missing file: {file}"
def test_wrapper_agent_imports(self):
"""Test that wrapper agent has correct imports."""
with open(
"examples/stagehand/1_stagehand_wrapper_agent.py", "r"
) as f:
content = f.read()
# Check for required imports
assert "from swarms import Agent" in content
assert "import asyncio" in content
assert "import json" in content
assert "class StagehandAgent" in content
def test_tools_agent_imports(self):
"""Test that tools agent has correct imports."""
with open(
"examples/stagehand/2_stagehand_tools_agent.py", "r"
) as f:
content = f.read()
# Check for required imports
assert "from swarms import Agent" in content
assert "def navigate_browser" in content
assert "def browser_act" in content
assert "def browser_extract" in content
def test_mcp_agent_imports(self):
"""Test that MCP agent has correct imports."""
with open(
"examples/stagehand/3_stagehand_mcp_agent.py", "r"
) as f:
content = f.read()
# Check for required imports
assert "from swarms import Agent" in content
assert "class StagehandMCPAgent" in content
assert "mcp_url" in content
def test_workflow_agent_imports(self):
"""Test that workflow agent has correct imports."""
with open(
"examples/stagehand/4_stagehand_multi_agent_workflow.py",
"r",
) as f:
content = f.read()
# Check for required imports
assert (
"from swarms import Agent, SequentialWorkflow, ConcurrentWorkflow"
in content
)
assert (
"from swarms.structs.agent_rearrange import AgentRearrange"
in content
)
class TestStagehandMockIntegration:
"""Test Stagehand integration with mocked dependencies."""
def test_mock_stagehand_initialization(self):
"""Test that Stagehand can be mocked and initialized."""
# Setup mock without importing actual stagehand
mock_stagehand = MagicMock()
mock_instance = MagicMock()
mock_instance.init = MagicMock()
mock_stagehand.return_value = mock_instance
# Mock config creation
config = MagicMock()
stagehand_instance = mock_stagehand(config)
# Verify mock works
assert stagehand_instance is not None
assert hasattr(stagehand_instance, "init")
def test_json_serialization(self):
"""Test JSON serialization for agent responses."""
# Test data that would come from browser automation
test_data = {
"task": "Navigate to example.com",
"status": "completed",
"data": {
"navigated_to": "https://example.com",
"extracted": ["item1", "item2"],
"action": "navigate",
},
}
# Test serialization
json_result = json.dumps(test_data, indent=2)
assert isinstance(json_result, str)
# Test deserialization
parsed_data = json.loads(json_result)
assert parsed_data["task"] == "Navigate to example.com"
assert parsed_data["status"] == "completed"
assert len(parsed_data["data"]["extracted"]) == 2
def test_url_extraction_logic(self):
"""Test URL extraction logic from task strings."""
import re
# Test cases
test_cases = [
(
"Navigate to https://example.com",
["https://example.com"],
),
("Go to google.com and search", ["google.com"]),
(
"Visit https://github.com/repo",
["https://github.com/repo"],
),
("Open example.org", ["example.org"]),
]
url_pattern = r"https?://[^\s]+"
domain_pattern = r"(\w+\.\w+)"
for task, expected in test_cases:
# Extract full URLs
urls = re.findall(url_pattern, task)
# If no full URLs, extract domains
if not urls:
domains = re.findall(domain_pattern, task)
if domains:
urls = domains
assert (
len(urls) > 0
), f"Failed to extract URL from: {task}"
assert (
urls[0] in expected
), f"Expected {expected}, got {urls}"
class TestSwarmsPatternsCompliance:
"""Test compliance with Swarms framework patterns."""
def test_agent_inheritance_pattern(self):
"""Test that wrapper agent follows Swarms Agent inheritance pattern."""
# Read the wrapper agent file
with open(
"examples/stagehand/1_stagehand_wrapper_agent.py", "r"
) as f:
content = f.read()
# Check inheritance pattern
assert "class StagehandAgent(SwarmsAgent):" in content
assert "def run(self, task: str" in content
assert "return" in content
def test_tools_pattern(self):
"""Test that tools follow Swarms function-based pattern."""
# Read the tools agent file
with open(
"examples/stagehand/2_stagehand_tools_agent.py", "r"
) as f:
content = f.read()
# Check function-based tool pattern
assert "def navigate_browser(url: str) -> str:" in content
assert "def browser_act(action: str) -> str:" in content
assert "def browser_extract(query: str) -> str:" in content
assert "def browser_observe(query: str) -> str:" in content
def test_mcp_integration_pattern(self):
"""Test MCP integration follows Swarms pattern."""
# Read the MCP agent file
with open(
"examples/stagehand/3_stagehand_mcp_agent.py", "r"
) as f:
content = f.read()
# Check MCP pattern
assert "mcp_url=" in content
assert "Agent(" in content
def test_workflow_patterns(self):
"""Test workflow patterns are properly used."""
# Read the workflow file
with open(
"examples/stagehand/4_stagehand_multi_agent_workflow.py",
"r",
) as f:
content = f.read()
# Check workflow patterns
assert "SequentialWorkflow" in content
assert "ConcurrentWorkflow" in content
assert "AgentRearrange" in content
class TestDocumentationAndExamples:
"""Test documentation and example completeness."""
def test_readme_completeness(self):
"""Test that README contains essential information."""
with open("examples/stagehand/README.md", "r") as f:
content = f.read()
required_sections = [
"# Stagehand Browser Automation Integration",
"## Overview",
"## Examples",
"## Setup",
"## Use Cases",
"## Best Practices",
]
for section in required_sections:
assert section in content, f"Missing section: {section}"
def test_requirements_file(self):
"""Test that requirements file has necessary dependencies."""
with open("examples/stagehand/requirements.txt", "r") as f:
content = f.read()
required_deps = [
"swarms",
"stagehand",
"python-dotenv",
"pydantic",
"loguru",
]
for dep in required_deps:
assert dep in content, f"Missing dependency: {dep}"
def test_example_files_have_docstrings(self):
"""Test that example files have proper docstrings."""
example_files = [
"examples/stagehand/1_stagehand_wrapper_agent.py",
"examples/stagehand/2_stagehand_tools_agent.py",
"examples/stagehand/3_stagehand_mcp_agent.py",
"examples/stagehand/4_stagehand_multi_agent_workflow.py",
]
for file_path in example_files:
with open(file_path, "r") as f:
content = f.read()
# Check for module docstring
assert (
'"""' in content[:500]
), f"Missing docstring in {file_path}"
# Check for main execution block
assert (
'if __name__ == "__main__":' in content
), f"Missing main block in {file_path}"
if __name__ == "__main__":
pytest.main([__file__, "-v"])
Loading…
Cancel
Save