commit
c0c9b7201a
@ -0,0 +1,265 @@
|
||||
"""
|
||||
Stagehand Browser Automation Agent for Swarms
|
||||
=============================================
|
||||
|
||||
This example demonstrates how to create a Swarms-compatible agent
|
||||
that wraps Stagehand's browser automation capabilities.
|
||||
|
||||
The StagehandAgent class inherits from the Swarms Agent base class
|
||||
and implements browser automation through natural language commands.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from swarms import Agent as SwarmsAgent
|
||||
from stagehand import Stagehand, StagehandConfig
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class WebData(BaseModel):
|
||||
"""Schema for extracted web data."""
|
||||
|
||||
url: str = Field(..., description="The URL of the page")
|
||||
title: str = Field(..., description="Page title")
|
||||
content: str = Field(..., description="Extracted content")
|
||||
metadata: Dict[str, Any] = Field(
|
||||
default_factory=dict, description="Additional metadata"
|
||||
)
|
||||
|
||||
|
||||
class StagehandAgent(SwarmsAgent):
|
||||
"""
|
||||
A Swarms agent that integrates Stagehand for browser automation.
|
||||
|
||||
This agent can navigate websites, extract data, perform actions,
|
||||
and observe page elements using natural language instructions.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
agent_name: str = "StagehandBrowserAgent",
|
||||
browserbase_api_key: Optional[str] = None,
|
||||
browserbase_project_id: Optional[str] = None,
|
||||
model_name: str = "gpt-4o-mini",
|
||||
model_api_key: Optional[str] = None,
|
||||
env: str = "LOCAL", # LOCAL or BROWSERBASE
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Initialize the StagehandAgent.
|
||||
|
||||
Args:
|
||||
agent_name: Name of the agent
|
||||
browserbase_api_key: API key for Browserbase (if using cloud)
|
||||
browserbase_project_id: Project ID for Browserbase
|
||||
model_name: LLM model to use
|
||||
model_api_key: API key for the model
|
||||
env: Environment - LOCAL or BROWSERBASE
|
||||
"""
|
||||
# Don't pass stagehand-specific args to parent
|
||||
super().__init__(agent_name=agent_name, *args, **kwargs)
|
||||
|
||||
self.stagehand_config = StagehandConfig(
|
||||
env=env,
|
||||
api_key=browserbase_api_key
|
||||
or os.getenv("BROWSERBASE_API_KEY"),
|
||||
project_id=browserbase_project_id
|
||||
or os.getenv("BROWSERBASE_PROJECT_ID"),
|
||||
model_name=model_name,
|
||||
model_api_key=model_api_key
|
||||
or os.getenv("OPENAI_API_KEY"),
|
||||
)
|
||||
self.stagehand = None
|
||||
self._initialized = False
|
||||
|
||||
async def _init_stagehand(self):
|
||||
"""Initialize Stagehand instance."""
|
||||
if not self._initialized:
|
||||
self.stagehand = Stagehand(self.stagehand_config)
|
||||
await self.stagehand.init()
|
||||
self._initialized = True
|
||||
logger.info(
|
||||
f"Stagehand initialized for {self.agent_name}"
|
||||
)
|
||||
|
||||
async def _close_stagehand(self):
|
||||
"""Close Stagehand instance."""
|
||||
if self.stagehand and self._initialized:
|
||||
await self.stagehand.close()
|
||||
self._initialized = False
|
||||
logger.info(f"Stagehand closed for {self.agent_name}")
|
||||
|
||||
def run(self, task: str, *args, **kwargs) -> str:
|
||||
"""
|
||||
Execute a browser automation task.
|
||||
|
||||
The task string should contain instructions like:
|
||||
- "Navigate to example.com and extract the main content"
|
||||
- "Go to google.com and search for 'AI agents'"
|
||||
- "Extract all company names from https://ycombinator.com"
|
||||
|
||||
Args:
|
||||
task: Natural language description of the browser task
|
||||
|
||||
Returns:
|
||||
String result of the task execution
|
||||
"""
|
||||
return asyncio.run(self._async_run(task, *args, **kwargs))
|
||||
|
||||
async def _async_run(self, task: str, *args, **kwargs) -> str:
|
||||
"""Async implementation of run method."""
|
||||
try:
|
||||
await self._init_stagehand()
|
||||
|
||||
# Parse the task to determine actions
|
||||
result = await self._execute_browser_task(task)
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in browser task: {str(e)}")
|
||||
return f"Error executing browser task: {str(e)}"
|
||||
finally:
|
||||
# Keep browser open for potential follow-up tasks
|
||||
pass
|
||||
|
||||
async def _execute_browser_task(
|
||||
self, task: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a browser task based on natural language instructions.
|
||||
|
||||
This method interprets the task and calls appropriate Stagehand methods.
|
||||
"""
|
||||
page = self.stagehand.page
|
||||
result = {"task": task, "status": "completed", "data": {}}
|
||||
|
||||
# Determine if task involves navigation
|
||||
if any(
|
||||
keyword in task.lower()
|
||||
for keyword in ["navigate", "go to", "visit", "open"]
|
||||
):
|
||||
# Extract URL from task
|
||||
import re
|
||||
|
||||
url_pattern = r"https?://[^\s]+"
|
||||
urls = re.findall(url_pattern, task)
|
||||
if not urls and any(
|
||||
domain in task for domain in [".com", ".org", ".net"]
|
||||
):
|
||||
# Try to extract domain names
|
||||
domain_pattern = r"(\w+\.\w+)"
|
||||
domains = re.findall(domain_pattern, task)
|
||||
if domains:
|
||||
urls = [f"https://{domain}" for domain in domains]
|
||||
|
||||
if urls:
|
||||
url = urls[0]
|
||||
await page.goto(url)
|
||||
result["data"]["navigated_to"] = url
|
||||
logger.info(f"Navigated to {url}")
|
||||
|
||||
# Determine action type
|
||||
if "extract" in task.lower():
|
||||
# Perform extraction
|
||||
extraction_prompt = task.replace("extract", "").strip()
|
||||
extracted = await page.extract(extraction_prompt)
|
||||
result["data"]["extracted"] = extracted
|
||||
result["action"] = "extract"
|
||||
|
||||
elif "click" in task.lower() or "press" in task.lower():
|
||||
# Perform action
|
||||
action_result = await page.act(task)
|
||||
result["data"]["action_performed"] = str(action_result)
|
||||
result["action"] = "act"
|
||||
|
||||
elif "search" in task.lower():
|
||||
# Perform search action
|
||||
search_query = (
|
||||
task.split("search for")[-1].strip().strip("'\"")
|
||||
)
|
||||
# First, find the search box
|
||||
search_box = await page.observe(
|
||||
"find the search input field"
|
||||
)
|
||||
if search_box:
|
||||
# Click on search box and type
|
||||
await page.act(f"click on {search_box[0]}")
|
||||
await page.act(f"type '{search_query}'")
|
||||
await page.act("press Enter")
|
||||
result["data"]["search_query"] = search_query
|
||||
result["action"] = "search"
|
||||
|
||||
elif "observe" in task.lower() or "find" in task.lower():
|
||||
# Perform observation
|
||||
observation = await page.observe(task)
|
||||
result["data"]["observation"] = [
|
||||
{
|
||||
"description": obs.description,
|
||||
"selector": obs.selector,
|
||||
}
|
||||
for obs in observation
|
||||
]
|
||||
result["action"] = "observe"
|
||||
|
||||
else:
|
||||
# General action
|
||||
action_result = await page.act(task)
|
||||
result["data"]["action_result"] = str(action_result)
|
||||
result["action"] = "general"
|
||||
|
||||
return result
|
||||
|
||||
def cleanup(self):
|
||||
"""Clean up browser resources."""
|
||||
if self._initialized:
|
||||
asyncio.run(self._close_stagehand())
|
||||
|
||||
def __del__(self):
|
||||
"""Ensure browser is closed on deletion."""
|
||||
self.cleanup()
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
# Create a Stagehand browser agent
|
||||
browser_agent = StagehandAgent(
|
||||
agent_name="WebScraperAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL", # Use LOCAL for Playwright, BROWSERBASE for cloud
|
||||
)
|
||||
|
||||
# Example 1: Navigate and extract data
|
||||
print("Example 1: Basic navigation and extraction")
|
||||
result1 = browser_agent.run(
|
||||
"Navigate to https://news.ycombinator.com and extract the titles of the top 5 stories"
|
||||
)
|
||||
print(result1)
|
||||
print("\n" + "=" * 50 + "\n")
|
||||
|
||||
# Example 2: Perform a search
|
||||
print("Example 2: Search on a website")
|
||||
result2 = browser_agent.run(
|
||||
"Go to google.com and search for 'Swarms AI framework'"
|
||||
)
|
||||
print(result2)
|
||||
print("\n" + "=" * 50 + "\n")
|
||||
|
||||
# Example 3: Extract structured data
|
||||
print("Example 3: Extract specific information")
|
||||
result3 = browser_agent.run(
|
||||
"Navigate to https://example.com and extract the main heading and first paragraph"
|
||||
)
|
||||
print(result3)
|
||||
|
||||
# Clean up
|
||||
browser_agent.cleanup()
|
@ -0,0 +1,397 @@
|
||||
"""
|
||||
Stagehand Tools for Swarms Agent
|
||||
=================================
|
||||
|
||||
This example demonstrates how to create Stagehand browser automation tools
|
||||
that can be used by a standard Swarms Agent. Each Stagehand method (act,
|
||||
extract, observe) becomes a separate tool that the agent can use.
|
||||
|
||||
This approach gives the agent more fine-grained control over browser
|
||||
automation tasks.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from swarms import Agent
|
||||
from stagehand import Stagehand, StagehandConfig
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class BrowserState:
|
||||
"""Singleton to manage browser state across tools."""
|
||||
|
||||
_instance = None
|
||||
_stagehand = None
|
||||
_initialized = False
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
async def init_browser(
|
||||
self,
|
||||
env: str = "LOCAL",
|
||||
api_key: Optional[str] = None,
|
||||
project_id: Optional[str] = None,
|
||||
model_name: str = "gpt-4o-mini",
|
||||
model_api_key: Optional[str] = None,
|
||||
):
|
||||
"""Initialize the browser if not already initialized."""
|
||||
if not self._initialized:
|
||||
config = StagehandConfig(
|
||||
env=env,
|
||||
api_key=api_key or os.getenv("BROWSERBASE_API_KEY"),
|
||||
project_id=project_id
|
||||
or os.getenv("BROWSERBASE_PROJECT_ID"),
|
||||
model_name=model_name,
|
||||
model_api_key=model_api_key
|
||||
or os.getenv("OPENAI_API_KEY"),
|
||||
)
|
||||
self._stagehand = Stagehand(config)
|
||||
await self._stagehand.init()
|
||||
self._initialized = True
|
||||
logger.info("Stagehand browser initialized")
|
||||
|
||||
async def get_page(self):
|
||||
"""Get the current page instance."""
|
||||
if not self._initialized:
|
||||
raise RuntimeError(
|
||||
"Browser not initialized. Call init_browser first."
|
||||
)
|
||||
return self._stagehand.page
|
||||
|
||||
async def close(self):
|
||||
"""Close the browser."""
|
||||
if self._initialized and self._stagehand:
|
||||
await self._stagehand.close()
|
||||
self._initialized = False
|
||||
logger.info("Stagehand browser closed")
|
||||
|
||||
|
||||
# Browser state instance
|
||||
browser_state = BrowserState()
|
||||
|
||||
|
||||
def navigate_browser(url: str) -> str:
|
||||
"""
|
||||
Navigate to a URL in the browser.
|
||||
|
||||
Args:
|
||||
url (str): The URL to navigate to. Should be a valid URL starting with http:// or https://.
|
||||
If no protocol is provided, https:// will be added automatically.
|
||||
|
||||
Returns:
|
||||
str: Success message with the URL navigated to, or error message if navigation fails
|
||||
|
||||
Raises:
|
||||
RuntimeError: If browser initialization fails
|
||||
Exception: If navigation to the URL fails
|
||||
|
||||
Example:
|
||||
>>> result = navigate_browser("https://example.com")
|
||||
>>> print(result)
|
||||
"Successfully navigated to https://example.com"
|
||||
|
||||
>>> result = navigate_browser("google.com")
|
||||
>>> print(result)
|
||||
"Successfully navigated to https://google.com"
|
||||
"""
|
||||
return asyncio.run(_navigate_browser_async(url))
|
||||
|
||||
|
||||
async def _navigate_browser_async(url: str) -> str:
|
||||
"""Async implementation of navigate_browser."""
|
||||
try:
|
||||
await browser_state.init_browser()
|
||||
page = await browser_state.get_page()
|
||||
|
||||
# Ensure URL has protocol
|
||||
if not url.startswith(("http://", "https://")):
|
||||
url = f"https://{url}"
|
||||
|
||||
await page.goto(url)
|
||||
return f"Successfully navigated to {url}"
|
||||
except Exception as e:
|
||||
logger.error(f"Navigation error: {str(e)}")
|
||||
return f"Failed to navigate to {url}: {str(e)}"
|
||||
|
||||
|
||||
def browser_act(action: str) -> str:
|
||||
"""
|
||||
Perform an action on the current web page using natural language.
|
||||
|
||||
Args:
|
||||
action (str): Natural language description of the action to perform.
|
||||
Examples: 'click the submit button', 'type hello@example.com in the email field',
|
||||
'scroll down', 'press Enter', 'select option from dropdown'
|
||||
|
||||
Returns:
|
||||
str: JSON formatted string with action result and status information
|
||||
|
||||
Raises:
|
||||
RuntimeError: If browser is not initialized or page is not available
|
||||
Exception: If the action cannot be performed on the current page
|
||||
|
||||
Example:
|
||||
>>> result = browser_act("click the submit button")
|
||||
>>> print(result)
|
||||
"Action performed: click the submit button. Result: clicked successfully"
|
||||
|
||||
>>> result = browser_act("type hello@example.com in the email field")
|
||||
>>> print(result)
|
||||
"Action performed: type hello@example.com in the email field. Result: text entered"
|
||||
"""
|
||||
return asyncio.run(_browser_act_async(action))
|
||||
|
||||
|
||||
async def _browser_act_async(action: str) -> str:
|
||||
"""Async implementation of browser_act."""
|
||||
try:
|
||||
await browser_state.init_browser()
|
||||
page = await browser_state.get_page()
|
||||
|
||||
result = await page.act(action)
|
||||
return f"Action performed: {action}. Result: {result}"
|
||||
except Exception as e:
|
||||
logger.error(f"Action error: {str(e)}")
|
||||
return f"Failed to perform action '{action}': {str(e)}"
|
||||
|
||||
|
||||
def browser_extract(query: str) -> str:
|
||||
"""
|
||||
Extract information from the current web page using natural language.
|
||||
|
||||
Args:
|
||||
query (str): Natural language description of what information to extract.
|
||||
Examples: 'extract all email addresses', 'get the main article text',
|
||||
'find all product prices', 'extract the page title and meta description'
|
||||
|
||||
Returns:
|
||||
str: JSON formatted string containing the extracted information, or error message if extraction fails
|
||||
|
||||
Raises:
|
||||
RuntimeError: If browser is not initialized or page is not available
|
||||
Exception: If extraction fails due to page content or parsing issues
|
||||
|
||||
Example:
|
||||
>>> result = browser_extract("extract all email addresses")
|
||||
>>> print(result)
|
||||
'["contact@example.com", "support@example.com"]'
|
||||
|
||||
>>> result = browser_extract("get the main article text")
|
||||
>>> print(result)
|
||||
'{"title": "Article Title", "content": "Article content..."}'
|
||||
"""
|
||||
return asyncio.run(_browser_extract_async(query))
|
||||
|
||||
|
||||
async def _browser_extract_async(query: str) -> str:
|
||||
"""Async implementation of browser_extract."""
|
||||
try:
|
||||
await browser_state.init_browser()
|
||||
page = await browser_state.get_page()
|
||||
|
||||
extracted = await page.extract(query)
|
||||
|
||||
# Convert to JSON string for agent consumption
|
||||
if isinstance(extracted, (dict, list)):
|
||||
return json.dumps(extracted, indent=2)
|
||||
else:
|
||||
return str(extracted)
|
||||
except Exception as e:
|
||||
logger.error(f"Extraction error: {str(e)}")
|
||||
return f"Failed to extract '{query}': {str(e)}"
|
||||
|
||||
|
||||
def browser_observe(query: str) -> str:
|
||||
"""
|
||||
Observe and find elements on the current web page using natural language.
|
||||
|
||||
Args:
|
||||
query (str): Natural language description of elements to find.
|
||||
Examples: 'find the search box', 'locate the submit button',
|
||||
'find all navigation links', 'observe form elements'
|
||||
|
||||
Returns:
|
||||
str: JSON formatted string containing information about found elements including
|
||||
their descriptions, selectors, and interaction methods
|
||||
|
||||
Raises:
|
||||
RuntimeError: If browser is not initialized or page is not available
|
||||
Exception: If observation fails due to page structure or element detection issues
|
||||
|
||||
Example:
|
||||
>>> result = browser_observe("find the search box")
|
||||
>>> print(result)
|
||||
'[{"description": "Search input field", "selector": "#search", "method": "input"}]'
|
||||
|
||||
>>> result = browser_observe("locate the submit button")
|
||||
>>> print(result)
|
||||
'[{"description": "Submit button", "selector": "button[type=submit]", "method": "click"}]'
|
||||
"""
|
||||
return asyncio.run(_browser_observe_async(query))
|
||||
|
||||
|
||||
async def _browser_observe_async(query: str) -> str:
|
||||
"""Async implementation of browser_observe."""
|
||||
try:
|
||||
await browser_state.init_browser()
|
||||
page = await browser_state.get_page()
|
||||
|
||||
observations = await page.observe(query)
|
||||
|
||||
# Format observations for readability
|
||||
result = []
|
||||
for obs in observations:
|
||||
result.append(
|
||||
{
|
||||
"description": obs.description,
|
||||
"selector": obs.selector,
|
||||
"method": obs.method,
|
||||
}
|
||||
)
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
except Exception as e:
|
||||
logger.error(f"Observation error: {str(e)}")
|
||||
return f"Failed to observe '{query}': {str(e)}"
|
||||
|
||||
|
||||
def browser_screenshot(filename: str = "screenshot.png") -> str:
|
||||
"""
|
||||
Take a screenshot of the current web page.
|
||||
|
||||
Args:
|
||||
filename (str, optional): The filename to save the screenshot to.
|
||||
Defaults to "screenshot.png".
|
||||
.png extension will be added automatically if not provided.
|
||||
|
||||
Returns:
|
||||
str: Success message with the filename where screenshot was saved,
|
||||
or error message if screenshot fails
|
||||
|
||||
Raises:
|
||||
RuntimeError: If browser is not initialized or page is not available
|
||||
Exception: If screenshot capture or file saving fails
|
||||
|
||||
Example:
|
||||
>>> result = browser_screenshot()
|
||||
>>> print(result)
|
||||
"Screenshot saved to screenshot.png"
|
||||
|
||||
>>> result = browser_screenshot("page_capture.png")
|
||||
>>> print(result)
|
||||
"Screenshot saved to page_capture.png"
|
||||
"""
|
||||
return asyncio.run(_browser_screenshot_async(filename))
|
||||
|
||||
|
||||
async def _browser_screenshot_async(filename: str) -> str:
|
||||
"""Async implementation of browser_screenshot."""
|
||||
try:
|
||||
await browser_state.init_browser()
|
||||
page = await browser_state.get_page()
|
||||
|
||||
# Ensure .png extension
|
||||
if not filename.endswith(".png"):
|
||||
filename += ".png"
|
||||
|
||||
# Get the underlying Playwright page
|
||||
playwright_page = page.page
|
||||
await playwright_page.screenshot(path=filename)
|
||||
|
||||
return f"Screenshot saved to {filename}"
|
||||
except Exception as e:
|
||||
logger.error(f"Screenshot error: {str(e)}")
|
||||
return f"Failed to take screenshot: {str(e)}"
|
||||
|
||||
|
||||
def close_browser() -> str:
|
||||
"""
|
||||
Close the browser when done with automation tasks.
|
||||
|
||||
Returns:
|
||||
str: Success message if browser is closed successfully,
|
||||
or error message if closing fails
|
||||
|
||||
Raises:
|
||||
Exception: If browser closing process encounters errors
|
||||
|
||||
Example:
|
||||
>>> result = close_browser()
|
||||
>>> print(result)
|
||||
"Browser closed successfully"
|
||||
"""
|
||||
return asyncio.run(_close_browser_async())
|
||||
|
||||
|
||||
async def _close_browser_async() -> str:
|
||||
"""Async implementation of close_browser."""
|
||||
try:
|
||||
await browser_state.close()
|
||||
return "Browser closed successfully"
|
||||
except Exception as e:
|
||||
logger.error(f"Close browser error: {str(e)}")
|
||||
return f"Failed to close browser: {str(e)}"
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
# Create a Swarms agent with browser tools
|
||||
browser_agent = Agent(
|
||||
agent_name="BrowserAutomationAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
max_loops=1,
|
||||
tools=[
|
||||
navigate_browser,
|
||||
browser_act,
|
||||
browser_extract,
|
||||
browser_observe,
|
||||
browser_screenshot,
|
||||
close_browser,
|
||||
],
|
||||
system_prompt="""You are a web browser automation specialist. You can:
|
||||
1. Navigate to websites using the navigate_browser tool
|
||||
2. Perform actions like clicking and typing using the browser_act tool
|
||||
3. Extract information from pages using the browser_extract tool
|
||||
4. Find and observe elements using the browser_observe tool
|
||||
5. Take screenshots using the browser_screenshot tool
|
||||
6. Close the browser when done using the close_browser tool
|
||||
|
||||
Always start by navigating to a URL before trying to interact with a page.
|
||||
Be specific in your actions and extractions. When done with tasks, close the browser.""",
|
||||
)
|
||||
|
||||
# Example 1: Research task
|
||||
print("Example 1: Automated web research")
|
||||
result1 = browser_agent.run(
|
||||
"Go to hackernews (news.ycombinator.com) and extract the titles of the top 5 stories. Then take a screenshot."
|
||||
)
|
||||
print(result1)
|
||||
print("\n" + "=" * 50 + "\n")
|
||||
|
||||
# Example 2: Search task
|
||||
print("Example 2: Perform a web search")
|
||||
result2 = browser_agent.run(
|
||||
"Navigate to google.com, search for 'Python web scraping best practices', and extract the first 3 search result titles"
|
||||
)
|
||||
print(result2)
|
||||
print("\n" + "=" * 50 + "\n")
|
||||
|
||||
# Example 3: Form interaction
|
||||
print("Example 3: Interact with a form")
|
||||
result3 = browser_agent.run(
|
||||
"Go to example.com and observe what elements are on the page. Then extract all the text content."
|
||||
)
|
||||
print(result3)
|
||||
|
||||
# Clean up
|
||||
browser_agent.run("Close the browser")
|
@ -0,0 +1,263 @@
|
||||
"""
|
||||
Stagehand MCP Server Integration with Swarms
|
||||
============================================
|
||||
|
||||
This example demonstrates how to use the Stagehand MCP (Model Context Protocol)
|
||||
server with Swarms agents. The MCP server provides browser automation capabilities
|
||||
as standardized tools that can be discovered and used by agents.
|
||||
|
||||
Prerequisites:
|
||||
1. Install and run the Stagehand MCP server:
|
||||
cd stagehand-mcp-server
|
||||
npm install
|
||||
npm run build
|
||||
npm start
|
||||
|
||||
2. The server will start on http://localhost:3000/sse
|
||||
|
||||
Features:
|
||||
- Automatic tool discovery from MCP server
|
||||
- Multi-session browser management
|
||||
- Built-in screenshot resources
|
||||
- Prompt templates for common tasks
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from swarms import Agent
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class StagehandMCPAgent:
|
||||
"""
|
||||
A Swarms agent that connects to the Stagehand MCP server
|
||||
for browser automation capabilities.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
agent_name: str = "StagehandMCPAgent",
|
||||
mcp_server_url: str = "http://localhost:3000/sse",
|
||||
model_name: str = "gpt-4o-mini",
|
||||
max_loops: int = 1,
|
||||
):
|
||||
"""
|
||||
Initialize the Stagehand MCP Agent.
|
||||
|
||||
Args:
|
||||
agent_name: Name of the agent
|
||||
mcp_server_url: URL of the Stagehand MCP server
|
||||
model_name: LLM model to use
|
||||
max_loops: Maximum number of reasoning loops
|
||||
"""
|
||||
self.agent = Agent(
|
||||
agent_name=agent_name,
|
||||
model_name=model_name,
|
||||
max_loops=max_loops,
|
||||
# Connect to the Stagehand MCP server
|
||||
mcp_url=mcp_server_url,
|
||||
system_prompt="""You are a web browser automation specialist with access to Stagehand MCP tools.
|
||||
|
||||
Available tools from the MCP server:
|
||||
- navigate: Navigate to a URL
|
||||
- act: Perform actions on web pages (click, type, etc.)
|
||||
- extract: Extract data from web pages
|
||||
- observe: Find and observe elements on pages
|
||||
- screenshot: Take screenshots
|
||||
- createSession: Create new browser sessions for parallel tasks
|
||||
- listSessions: List active browser sessions
|
||||
- closeSession: Close browser sessions
|
||||
|
||||
For multi-page workflows, you can create multiple sessions.
|
||||
Always be specific in your actions and extractions.
|
||||
Remember to close sessions when done with them.""",
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
def run(self, task: str) -> str:
|
||||
"""Run a browser automation task."""
|
||||
return self.agent.run(task)
|
||||
|
||||
|
||||
class MultiSessionBrowserSwarm:
|
||||
"""
|
||||
A multi-agent swarm that uses multiple browser sessions
|
||||
for parallel web automation tasks.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mcp_server_url: str = "http://localhost:3000/sse",
|
||||
num_agents: int = 3,
|
||||
):
|
||||
"""
|
||||
Initialize a swarm of browser automation agents.
|
||||
|
||||
Args:
|
||||
mcp_server_url: URL of the Stagehand MCP server
|
||||
num_agents: Number of agents to create
|
||||
"""
|
||||
self.agents = []
|
||||
|
||||
# Create specialized agents for different tasks
|
||||
agent_roles = [
|
||||
(
|
||||
"DataExtractor",
|
||||
"You specialize in extracting structured data from websites.",
|
||||
),
|
||||
(
|
||||
"FormFiller",
|
||||
"You specialize in filling out forms and interacting with web applications.",
|
||||
),
|
||||
(
|
||||
"WebMonitor",
|
||||
"You specialize in monitoring websites for changes and capturing screenshots.",
|
||||
),
|
||||
]
|
||||
|
||||
for i in range(min(num_agents, len(agent_roles))):
|
||||
name, specialization = agent_roles[i]
|
||||
agent = Agent(
|
||||
agent_name=f"{name}_{i}",
|
||||
model_name="gpt-4o-mini",
|
||||
max_loops=1,
|
||||
mcp_url=mcp_server_url,
|
||||
system_prompt=f"""You are a web browser automation specialist. {specialization}
|
||||
|
||||
You have access to Stagehand MCP tools including:
|
||||
- createSession: Create a new browser session
|
||||
- navigate_session: Navigate to URLs in a specific session
|
||||
- act_session: Perform actions in a specific session
|
||||
- extract_session: Extract data from a specific session
|
||||
- observe_session: Observe elements in a specific session
|
||||
- closeSession: Close a session when done
|
||||
|
||||
Always create your own session for tasks to work independently from other agents.""",
|
||||
verbose=True,
|
||||
)
|
||||
self.agents.append(agent)
|
||||
|
||||
def distribute_tasks(self, tasks: List[str]) -> List[str]:
|
||||
"""Distribute tasks among agents."""
|
||||
results = []
|
||||
|
||||
# Distribute tasks round-robin among agents
|
||||
for i, task in enumerate(tasks):
|
||||
agent_idx = i % len(self.agents)
|
||||
agent = self.agents[agent_idx]
|
||||
|
||||
logger.info(
|
||||
f"Assigning task to {agent.agent_name}: {task}"
|
||||
)
|
||||
result = agent.run(task)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
print("=" * 70)
|
||||
print("Stagehand MCP Server Integration Examples")
|
||||
print("=" * 70)
|
||||
print(
|
||||
"\nMake sure the Stagehand MCP server is running on http://localhost:3000/sse"
|
||||
)
|
||||
print("Run: cd stagehand-mcp-server && npm start\n")
|
||||
|
||||
# Example 1: Single agent with MCP tools
|
||||
print("\nExample 1: Single Agent with MCP Tools")
|
||||
print("-" * 40)
|
||||
|
||||
mcp_agent = StagehandMCPAgent(
|
||||
agent_name="WebResearchAgent",
|
||||
mcp_server_url="http://localhost:3000/sse",
|
||||
)
|
||||
|
||||
# Research task using MCP tools
|
||||
result1 = mcp_agent.run(
|
||||
"""Navigate to news.ycombinator.com and extract the following:
|
||||
1. The titles of the top 5 stories
|
||||
2. Their points/scores
|
||||
3. Number of comments for each
|
||||
Then take a screenshot of the page."""
|
||||
)
|
||||
print(f"Result: {result1}")
|
||||
|
||||
print("\n" + "=" * 70 + "\n")
|
||||
|
||||
# Example 2: Multi-session parallel browsing
|
||||
print("Example 2: Multi-Session Parallel Browsing")
|
||||
print("-" * 40)
|
||||
|
||||
parallel_agent = StagehandMCPAgent(
|
||||
agent_name="ParallelBrowserAgent",
|
||||
mcp_server_url="http://localhost:3000/sse",
|
||||
)
|
||||
|
||||
result2 = parallel_agent.run(
|
||||
"""Create 3 browser sessions and perform these tasks in parallel:
|
||||
1. Session 1: Go to github.com/trending and extract the top 3 trending repositories
|
||||
2. Session 2: Go to reddit.com/r/programming and extract the top 3 posts
|
||||
3. Session 3: Go to stackoverflow.com and extract the featured questions
|
||||
|
||||
After extracting data from all sessions, close them."""
|
||||
)
|
||||
print(f"Result: {result2}")
|
||||
|
||||
print("\n" + "=" * 70 + "\n")
|
||||
|
||||
# Example 3: Multi-agent browser swarm
|
||||
print("Example 3: Multi-Agent Browser Swarm")
|
||||
print("-" * 40)
|
||||
|
||||
# Create a swarm of specialized browser agents
|
||||
browser_swarm = MultiSessionBrowserSwarm(
|
||||
mcp_server_url="http://localhost:3000/sse",
|
||||
num_agents=3,
|
||||
)
|
||||
|
||||
# Define tasks for the swarm
|
||||
swarm_tasks = [
|
||||
"Create a session, navigate to python.org, and extract information about the latest Python version and its key features",
|
||||
"Create a session, go to npmjs.com, search for 'stagehand', and extract information about the package including version and description",
|
||||
"Create a session, visit playwright.dev, and extract the main features and benefits listed on the homepage",
|
||||
]
|
||||
|
||||
print("Distributing tasks to browser swarm...")
|
||||
swarm_results = browser_swarm.distribute_tasks(swarm_tasks)
|
||||
|
||||
for i, result in enumerate(swarm_results):
|
||||
print(f"\nTask {i+1} Result: {result}")
|
||||
|
||||
print("\n" + "=" * 70 + "\n")
|
||||
|
||||
# Example 4: Complex workflow with session management
|
||||
print("Example 4: Complex Multi-Page Workflow")
|
||||
print("-" * 40)
|
||||
|
||||
workflow_agent = StagehandMCPAgent(
|
||||
agent_name="WorkflowAgent",
|
||||
mcp_server_url="http://localhost:3000/sse",
|
||||
max_loops=2, # Allow more complex reasoning
|
||||
)
|
||||
|
||||
result4 = workflow_agent.run(
|
||||
"""Perform a comprehensive analysis of AI frameworks:
|
||||
1. Create a new session
|
||||
2. Navigate to github.com/huggingface/transformers and extract the star count and latest release info
|
||||
3. In the same session, navigate to github.com/openai/gpt-3 and extract similar information
|
||||
4. Navigate to github.com/anthropics/anthropic-sdk-python and extract repository statistics
|
||||
5. Take screenshots of each repository page
|
||||
6. Compile a comparison report of all three repositories
|
||||
7. Close the session when done"""
|
||||
)
|
||||
print(f"Result: {result4}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("All examples completed!")
|
||||
print("=" * 70)
|
@ -0,0 +1,371 @@
|
||||
"""
|
||||
Stagehand Multi-Agent Browser Automation Workflows
|
||||
=================================================
|
||||
|
||||
This example demonstrates advanced multi-agent workflows using Stagehand
|
||||
for complex browser automation scenarios. It shows how multiple agents
|
||||
can work together to accomplish sophisticated web tasks.
|
||||
|
||||
Use cases:
|
||||
1. E-commerce price monitoring across multiple sites
|
||||
2. Competitive analysis and market research
|
||||
3. Automated testing and validation workflows
|
||||
4. Data aggregation from multiple sources
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from swarms import Agent, SequentialWorkflow, ConcurrentWorkflow
|
||||
from swarms.structs.agent_rearrange import AgentRearrange
|
||||
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# Pydantic models for structured data
|
||||
class ProductInfo(BaseModel):
|
||||
"""Product information schema."""
|
||||
|
||||
name: str = Field(..., description="Product name")
|
||||
price: float = Field(..., description="Product price")
|
||||
availability: str = Field(..., description="Availability status")
|
||||
url: str = Field(..., description="Product URL")
|
||||
screenshot_path: Optional[str] = Field(
|
||||
None, description="Screenshot file path"
|
||||
)
|
||||
|
||||
|
||||
class MarketAnalysis(BaseModel):
|
||||
"""Market analysis report schema."""
|
||||
|
||||
timestamp: datetime = Field(default_factory=datetime.now)
|
||||
products: List[ProductInfo] = Field(
|
||||
..., description="List of products analyzed"
|
||||
)
|
||||
price_range: Dict[str, float] = Field(
|
||||
..., description="Min and max prices"
|
||||
)
|
||||
recommendations: List[str] = Field(
|
||||
..., description="Analysis recommendations"
|
||||
)
|
||||
|
||||
|
||||
# Specialized browser agents
|
||||
class ProductScraperAgent(StagehandAgent):
|
||||
"""Specialized agent for scraping product information."""
|
||||
|
||||
def __init__(self, site_name: str, *args, **kwargs):
|
||||
super().__init__(
|
||||
agent_name=f"ProductScraper_{site_name}", *args, **kwargs
|
||||
)
|
||||
self.site_name = site_name
|
||||
|
||||
|
||||
class PriceMonitorAgent(StagehandAgent):
|
||||
"""Specialized agent for monitoring price changes."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(
|
||||
agent_name="PriceMonitorAgent", *args, **kwargs
|
||||
)
|
||||
|
||||
|
||||
# Example 1: E-commerce Price Comparison Workflow
|
||||
def create_price_comparison_workflow():
|
||||
"""
|
||||
Create a workflow that compares prices across multiple e-commerce sites.
|
||||
"""
|
||||
|
||||
# Create specialized agents for different sites
|
||||
amazon_agent = StagehandAgent(
|
||||
agent_name="AmazonScraperAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
ebay_agent = StagehandAgent(
|
||||
agent_name="EbayScraperAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
analysis_agent = Agent(
|
||||
agent_name="PriceAnalysisAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
system_prompt="""You are a price analysis expert. Analyze product prices from multiple sources
|
||||
and provide insights on the best deals, price trends, and recommendations.
|
||||
Focus on value for money and highlight any significant price differences.""",
|
||||
)
|
||||
|
||||
# Create concurrent workflow for parallel scraping
|
||||
scraping_workflow = ConcurrentWorkflow(
|
||||
agents=[amazon_agent, ebay_agent],
|
||||
max_loops=1,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# Create sequential workflow: scrape -> analyze
|
||||
full_workflow = SequentialWorkflow(
|
||||
agents=[scraping_workflow, analysis_agent],
|
||||
max_loops=1,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
return full_workflow
|
||||
|
||||
|
||||
# Example 2: Competitive Analysis Workflow
|
||||
def create_competitive_analysis_workflow():
|
||||
"""
|
||||
Create a workflow for competitive analysis across multiple company websites.
|
||||
"""
|
||||
|
||||
# Agent for extracting company information
|
||||
company_researcher = StagehandAgent(
|
||||
agent_name="CompanyResearchAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
# Agent for analyzing social media presence
|
||||
social_media_agent = StagehandAgent(
|
||||
agent_name="SocialMediaAnalysisAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
# Agent for compiling competitive analysis report
|
||||
report_compiler = Agent(
|
||||
agent_name="CompetitiveAnalysisReporter",
|
||||
model_name="gpt-4o-mini",
|
||||
system_prompt="""You are a competitive analysis expert. Compile comprehensive reports
|
||||
based on company information and social media presence data. Identify strengths,
|
||||
weaknesses, and market positioning for each company.""",
|
||||
)
|
||||
|
||||
# Create agent rearrange for flexible routing
|
||||
workflow_pattern = (
|
||||
"company_researcher -> social_media_agent -> report_compiler"
|
||||
)
|
||||
|
||||
competitive_workflow = AgentRearrange(
|
||||
agents=[
|
||||
company_researcher,
|
||||
social_media_agent,
|
||||
report_compiler,
|
||||
],
|
||||
flow=workflow_pattern,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
return competitive_workflow
|
||||
|
||||
|
||||
# Example 3: Automated Testing Workflow
|
||||
def create_automated_testing_workflow():
|
||||
"""
|
||||
Create a workflow for automated web application testing.
|
||||
"""
|
||||
|
||||
# Agent for UI testing
|
||||
ui_tester = StagehandAgent(
|
||||
agent_name="UITestingAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
# Agent for form validation testing
|
||||
form_tester = StagehandAgent(
|
||||
agent_name="FormValidationAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
# Agent for accessibility testing
|
||||
accessibility_tester = StagehandAgent(
|
||||
agent_name="AccessibilityTestingAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
# Agent for compiling test results
|
||||
test_reporter = Agent(
|
||||
agent_name="TestReportCompiler",
|
||||
model_name="gpt-4o-mini",
|
||||
system_prompt="""You are a QA test report specialist. Compile test results from
|
||||
UI, form validation, and accessibility testing into a comprehensive report.
|
||||
Highlight any failures, warnings, and provide recommendations for fixes.""",
|
||||
)
|
||||
|
||||
# Concurrent testing followed by report generation
|
||||
testing_workflow = ConcurrentWorkflow(
|
||||
agents=[ui_tester, form_tester, accessibility_tester],
|
||||
max_loops=1,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
full_test_workflow = SequentialWorkflow(
|
||||
agents=[testing_workflow, test_reporter],
|
||||
max_loops=1,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
return full_test_workflow
|
||||
|
||||
|
||||
# Example 4: News Aggregation and Sentiment Analysis
|
||||
def create_news_aggregation_workflow():
|
||||
"""
|
||||
Create a workflow for news aggregation and sentiment analysis.
|
||||
"""
|
||||
|
||||
# Multiple news scraper agents
|
||||
news_scrapers = []
|
||||
news_sites = [
|
||||
("TechCrunch", "https://techcrunch.com"),
|
||||
("HackerNews", "https://news.ycombinator.com"),
|
||||
("Reddit", "https://reddit.com/r/technology"),
|
||||
]
|
||||
|
||||
for site_name, url in news_sites:
|
||||
scraper = StagehandAgent(
|
||||
agent_name=f"{site_name}Scraper",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
news_scrapers.append(scraper)
|
||||
|
||||
# Sentiment analysis agent
|
||||
sentiment_analyzer = Agent(
|
||||
agent_name="SentimentAnalyzer",
|
||||
model_name="gpt-4o-mini",
|
||||
system_prompt="""You are a sentiment analysis expert. Analyze news articles and posts
|
||||
to determine overall sentiment (positive, negative, neutral) and identify key themes
|
||||
and trends in the technology sector.""",
|
||||
)
|
||||
|
||||
# Trend identification agent
|
||||
trend_identifier = Agent(
|
||||
agent_name="TrendIdentifier",
|
||||
model_name="gpt-4o-mini",
|
||||
system_prompt="""You are a trend analysis expert. Based on aggregated news and sentiment
|
||||
data, identify emerging trends, hot topics, and potential market movements in the
|
||||
technology sector.""",
|
||||
)
|
||||
|
||||
# Create workflow: parallel scraping -> sentiment analysis -> trend identification
|
||||
scraping_workflow = ConcurrentWorkflow(
|
||||
agents=news_scrapers,
|
||||
max_loops=1,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
analysis_workflow = SequentialWorkflow(
|
||||
agents=[
|
||||
scraping_workflow,
|
||||
sentiment_analyzer,
|
||||
trend_identifier,
|
||||
],
|
||||
max_loops=1,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
return analysis_workflow
|
||||
|
||||
|
||||
# Main execution examples
|
||||
if __name__ == "__main__":
|
||||
print("=" * 70)
|
||||
print("Stagehand Multi-Agent Workflow Examples")
|
||||
print("=" * 70)
|
||||
|
||||
# Example 1: Price Comparison
|
||||
print("\nExample 1: E-commerce Price Comparison")
|
||||
print("-" * 40)
|
||||
|
||||
price_workflow = create_price_comparison_workflow()
|
||||
|
||||
# Search for a specific product across multiple sites
|
||||
price_result = price_workflow.run(
|
||||
"""Search for 'iPhone 15 Pro Max 256GB' on:
|
||||
1. Amazon - extract price, availability, and seller information
|
||||
2. eBay - extract price range, number of listings, and average price
|
||||
Take screenshots of search results from both sites.
|
||||
Compare the prices and provide recommendations on where to buy."""
|
||||
)
|
||||
print(f"Price Comparison Result:\n{price_result}")
|
||||
|
||||
print("\n" + "=" * 70 + "\n")
|
||||
|
||||
# Example 2: Competitive Analysis
|
||||
print("Example 2: Competitive Analysis")
|
||||
print("-" * 40)
|
||||
|
||||
competitive_workflow = create_competitive_analysis_workflow()
|
||||
|
||||
competitive_result = competitive_workflow.run(
|
||||
"""Analyze these three AI companies:
|
||||
1. OpenAI - visit openai.com and extract mission, products, and recent announcements
|
||||
2. Anthropic - visit anthropic.com and extract their AI safety approach and products
|
||||
3. DeepMind - visit deepmind.com and extract research focus and achievements
|
||||
|
||||
Then check their Twitter/X presence and recent posts.
|
||||
Compile a competitive analysis report comparing their market positioning."""
|
||||
)
|
||||
print(f"Competitive Analysis Result:\n{competitive_result}")
|
||||
|
||||
print("\n" + "=" * 70 + "\n")
|
||||
|
||||
# Example 3: Automated Testing
|
||||
print("Example 3: Automated Web Testing")
|
||||
print("-" * 40)
|
||||
|
||||
testing_workflow = create_automated_testing_workflow()
|
||||
|
||||
test_result = testing_workflow.run(
|
||||
"""Test the website example.com:
|
||||
1. UI Testing: Check if all main navigation links work, images load, and layout is responsive
|
||||
2. Form Testing: If there are any forms, test with valid and invalid inputs
|
||||
3. Accessibility: Check for alt texts, ARIA labels, and keyboard navigation
|
||||
|
||||
Take screenshots of any issues found and compile a comprehensive test report."""
|
||||
)
|
||||
print(f"Test Results:\n{test_result}")
|
||||
|
||||
print("\n" + "=" * 70 + "\n")
|
||||
|
||||
# Example 4: News Aggregation
|
||||
print("Example 4: Tech News Aggregation and Analysis")
|
||||
print("-" * 40)
|
||||
|
||||
news_workflow = create_news_aggregation_workflow()
|
||||
|
||||
news_result = news_workflow.run(
|
||||
"""For each news source:
|
||||
1. TechCrunch: Extract the top 5 headlines about AI or machine learning
|
||||
2. HackerNews: Extract the top 5 posts related to AI/ML with most points
|
||||
3. Reddit r/technology: Extract top 5 posts about AI from the past week
|
||||
|
||||
Analyze sentiment and identify emerging trends in AI technology."""
|
||||
)
|
||||
print(f"News Analysis Result:\n{news_result}")
|
||||
|
||||
# Cleanup all browser instances
|
||||
print("\n" + "=" * 70)
|
||||
print("Cleaning up browser instances...")
|
||||
|
||||
# Clean up agents
|
||||
for agent in price_workflow.agents:
|
||||
if isinstance(agent, StagehandAgent):
|
||||
agent.cleanup()
|
||||
elif hasattr(agent, "agents"): # For nested workflows
|
||||
for sub_agent in agent.agents:
|
||||
if isinstance(sub_agent, StagehandAgent):
|
||||
sub_agent.cleanup()
|
||||
|
||||
print("All workflows completed!")
|
||||
print("=" * 70)
|
@ -0,0 +1,249 @@
|
||||
# Stagehand Browser Automation Integration for Swarms
|
||||
|
||||
This directory contains examples demonstrating how to integrate [Stagehand](https://github.com/browserbase/stagehand), an AI-powered browser automation framework, with the Swarms multi-agent framework.
|
||||
|
||||
## Overview
|
||||
|
||||
Stagehand provides natural language browser automation capabilities that can be seamlessly integrated into Swarms agents. This integration enables:
|
||||
|
||||
- 🌐 **Natural Language Web Automation**: Use simple commands like "click the submit button" or "extract product prices"
|
||||
- 🤖 **Multi-Agent Browser Workflows**: Multiple agents can automate different websites simultaneously
|
||||
- 🔧 **Flexible Integration Options**: Use as a wrapped agent, individual tools, or via MCP server
|
||||
- 📊 **Complex Automation Scenarios**: E-commerce monitoring, competitive analysis, automated testing, and more
|
||||
|
||||
## Examples
|
||||
|
||||
### 1. Stagehand Wrapper Agent (`1_stagehand_wrapper_agent.py`)
|
||||
|
||||
The simplest integration - wraps Stagehand as a Swarms-compatible agent.
|
||||
|
||||
```python
|
||||
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
|
||||
|
||||
# Create a browser automation agent
|
||||
browser_agent = StagehandAgent(
|
||||
agent_name="WebScraperAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL", # or "BROWSERBASE" for cloud execution
|
||||
)
|
||||
|
||||
# Use natural language to control the browser
|
||||
result = browser_agent.run(
|
||||
"Navigate to news.ycombinator.com and extract the top 5 story titles"
|
||||
)
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Inherits from Swarms `Agent` base class
|
||||
- Automatic browser lifecycle management
|
||||
- Natural language task interpretation
|
||||
- Support for both local (Playwright) and cloud (Browserbase) execution
|
||||
|
||||
### 2. Stagehand as Tools (`2_stagehand_tools_agent.py`)
|
||||
|
||||
Provides fine-grained control by exposing Stagehand methods as individual tools.
|
||||
|
||||
```python
|
||||
from swarms import Agent
|
||||
from examples.stagehand.stagehand_tools_agent import (
|
||||
NavigateTool, ActTool, ExtractTool, ObserveTool, ScreenshotTool
|
||||
)
|
||||
|
||||
# Create agent with browser tools
|
||||
browser_agent = Agent(
|
||||
agent_name="BrowserAutomationAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
tools=[
|
||||
NavigateTool(),
|
||||
ActTool(),
|
||||
ExtractTool(),
|
||||
ObserveTool(),
|
||||
ScreenshotTool(),
|
||||
],
|
||||
)
|
||||
|
||||
# Agent can now use tools strategically
|
||||
result = browser_agent.run(
|
||||
"Go to google.com, search for 'Python tutorials', and extract the first 3 results"
|
||||
)
|
||||
```
|
||||
|
||||
**Available Tools:**
|
||||
- `NavigateTool`: Navigate to URLs
|
||||
- `ActTool`: Perform actions (click, type, scroll)
|
||||
- `ExtractTool`: Extract data from pages
|
||||
- `ObserveTool`: Find elements on pages
|
||||
- `ScreenshotTool`: Capture screenshots
|
||||
- `CloseBrowserTool`: Clean up browser resources
|
||||
|
||||
### 3. Stagehand MCP Server (`3_stagehand_mcp_agent.py`)
|
||||
|
||||
Integrates with Stagehand's Model Context Protocol (MCP) server for standardized tool access.
|
||||
|
||||
```python
|
||||
from examples.stagehand.stagehand_mcp_agent import StagehandMCPAgent
|
||||
|
||||
# Connect to Stagehand MCP server
|
||||
mcp_agent = StagehandMCPAgent(
|
||||
agent_name="WebResearchAgent",
|
||||
mcp_server_url="http://localhost:3000/sse",
|
||||
)
|
||||
|
||||
# Use MCP tools including multi-session management
|
||||
result = mcp_agent.run("""
|
||||
Create 3 browser sessions and:
|
||||
1. Session 1: Check Python.org for latest version
|
||||
2. Session 2: Check PyPI for trending packages
|
||||
3. Session 3: Check GitHub Python trending repos
|
||||
Compile a Python ecosystem status report.
|
||||
""")
|
||||
```
|
||||
|
||||
**MCP Features:**
|
||||
- Automatic tool discovery
|
||||
- Multi-session browser management
|
||||
- Built-in screenshot resources
|
||||
- Prompt templates for common tasks
|
||||
|
||||
### 4. Multi-Agent Workflows (`4_stagehand_multi_agent_workflow.py`)
|
||||
|
||||
Demonstrates complex multi-agent browser automation scenarios.
|
||||
|
||||
```python
|
||||
from examples.stagehand.stagehand_multi_agent_workflow import (
|
||||
create_price_comparison_workflow,
|
||||
create_competitive_analysis_workflow,
|
||||
create_automated_testing_workflow,
|
||||
create_news_aggregation_workflow
|
||||
)
|
||||
|
||||
# Price comparison across multiple e-commerce sites
|
||||
price_workflow = create_price_comparison_workflow()
|
||||
result = price_workflow.run(
|
||||
"Compare prices for iPhone 15 Pro on Amazon and eBay"
|
||||
)
|
||||
|
||||
# Competitive analysis of multiple companies
|
||||
competitive_workflow = create_competitive_analysis_workflow()
|
||||
result = competitive_workflow.run(
|
||||
"Analyze OpenAI, Anthropic, and DeepMind websites and social media"
|
||||
)
|
||||
```
|
||||
|
||||
**Workflow Examples:**
|
||||
- **E-commerce Monitoring**: Track prices across multiple sites
|
||||
- **Competitive Analysis**: Research competitors' websites and social media
|
||||
- **Automated Testing**: UI, form validation, and accessibility testing
|
||||
- **News Aggregation**: Collect and analyze news from multiple sources
|
||||
|
||||
## Setup
|
||||
|
||||
### Prerequisites
|
||||
|
||||
1. **Install Swarms and Stagehand:**
|
||||
```bash
|
||||
pip install swarms stagehand
|
||||
```
|
||||
|
||||
2. **Set up environment variables:**
|
||||
```bash
|
||||
# For local browser automation (using Playwright)
|
||||
export OPENAI_API_KEY="your-openai-key"
|
||||
|
||||
# For cloud browser automation (using Browserbase)
|
||||
export BROWSERBASE_API_KEY="your-browserbase-key"
|
||||
export BROWSERBASE_PROJECT_ID="your-project-id"
|
||||
```
|
||||
|
||||
3. **For MCP Server examples:**
|
||||
```bash
|
||||
# Install and run the Stagehand MCP server
|
||||
cd stagehand-mcp-server
|
||||
npm install
|
||||
npm run build
|
||||
npm start
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### E-commerce Automation
|
||||
- Price monitoring and comparison
|
||||
- Inventory tracking
|
||||
- Automated purchasing workflows
|
||||
- Review aggregation
|
||||
|
||||
### Research and Analysis
|
||||
- Competitive intelligence gathering
|
||||
- Market research automation
|
||||
- Social media monitoring
|
||||
- News and trend analysis
|
||||
|
||||
### Quality Assurance
|
||||
- Automated UI testing
|
||||
- Cross-browser compatibility testing
|
||||
- Form validation testing
|
||||
- Accessibility compliance checking
|
||||
|
||||
### Data Collection
|
||||
- Web scraping at scale
|
||||
- Real-time data monitoring
|
||||
- Structured data extraction
|
||||
- Screenshot documentation
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Resource Management**: Always clean up browser instances when done
|
||||
```python
|
||||
browser_agent.cleanup() # For wrapper agents
|
||||
```
|
||||
|
||||
2. **Error Handling**: Stagehand includes self-healing capabilities, but wrap critical operations in try-except blocks
|
||||
|
||||
3. **Parallel Execution**: Use `ConcurrentWorkflow` for simultaneous browser automation across multiple sites
|
||||
|
||||
4. **Session Management**: For complex multi-page workflows, use the MCP server's session management capabilities
|
||||
|
||||
5. **Rate Limiting**: Be respectful of websites - add delays between requests when necessary
|
||||
|
||||
## Testing
|
||||
|
||||
Run the test suite to verify the integration:
|
||||
|
||||
```bash
|
||||
pytest tests/stagehand/test_stagehand_integration.py -v
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Browser not starting**: Ensure Playwright is properly installed
|
||||
```bash
|
||||
playwright install
|
||||
```
|
||||
|
||||
2. **MCP connection failed**: Verify the MCP server is running on the correct port
|
||||
|
||||
3. **Timeout errors**: Increase timeout in StagehandConfig or agent initialization
|
||||
|
||||
### Debug Mode
|
||||
|
||||
Enable verbose logging:
|
||||
```python
|
||||
agent = StagehandAgent(
|
||||
agent_name="DebugAgent",
|
||||
verbose=True, # Enable detailed logging
|
||||
)
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions! Please:
|
||||
1. Follow the existing code style
|
||||
2. Add tests for new features
|
||||
3. Update documentation
|
||||
4. Submit PRs with clear descriptions
|
||||
|
||||
## License
|
||||
|
||||
These examples are provided under the same license as the Swarms framework. Stagehand is licensed separately - see [Stagehand's repository](https://github.com/browserbase/stagehand) for details.
|
@ -0,0 +1,13 @@
|
||||
# Requirements for Stagehand integration examples
|
||||
swarms>=8.0.0
|
||||
stagehand>=0.1.0
|
||||
python-dotenv>=1.0.0
|
||||
pydantic>=2.0.0
|
||||
loguru>=0.7.0
|
||||
|
||||
# For MCP server examples (optional)
|
||||
httpx>=0.24.0
|
||||
|
||||
# For testing
|
||||
pytest>=7.0.0
|
||||
pytest-asyncio>=0.21.0
|
@ -0,0 +1,436 @@
|
||||
"""
|
||||
Tests for Stagehand Integration with Swarms
|
||||
==========================================
|
||||
|
||||
This module contains tests for the Stagehand browser automation
|
||||
integration with the Swarms framework.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
|
||||
# Mock Stagehand classes
|
||||
class MockObserveResult:
|
||||
def __init__(self, description, selector, method="click"):
|
||||
self.description = description
|
||||
self.selector = selector
|
||||
self.method = method
|
||||
|
||||
|
||||
class MockStagehandPage:
|
||||
async def goto(self, url):
|
||||
return None
|
||||
|
||||
async def act(self, action):
|
||||
return f"Performed action: {action}"
|
||||
|
||||
async def extract(self, query):
|
||||
return {"extracted": query, "data": ["item1", "item2"]}
|
||||
|
||||
async def observe(self, query):
|
||||
return [
|
||||
MockObserveResult("Search box", "#search-input"),
|
||||
MockObserveResult("Submit button", "#submit-btn"),
|
||||
]
|
||||
|
||||
|
||||
class MockStagehand:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.page = MockStagehandPage()
|
||||
|
||||
async def init(self):
|
||||
pass
|
||||
|
||||
async def close(self):
|
||||
pass
|
||||
|
||||
|
||||
# Test StagehandAgent wrapper
|
||||
class TestStagehandAgent:
|
||||
"""Test the StagehandAgent wrapper class."""
|
||||
|
||||
@patch(
|
||||
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
|
||||
MockStagehand,
|
||||
)
|
||||
def test_agent_initialization(self):
|
||||
"""Test that StagehandAgent initializes correctly."""
|
||||
from examples.stagehand.stagehand_wrapper_agent import (
|
||||
StagehandAgent,
|
||||
)
|
||||
|
||||
agent = StagehandAgent(
|
||||
agent_name="TestAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
assert agent.agent_name == "TestAgent"
|
||||
assert agent.stagehand_config.env == "LOCAL"
|
||||
assert agent.stagehand_config.model_name == "gpt-4o-mini"
|
||||
assert not agent._initialized
|
||||
|
||||
@patch(
|
||||
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
|
||||
MockStagehand,
|
||||
)
|
||||
def test_navigation_task(self):
|
||||
"""Test navigation and extraction task."""
|
||||
from examples.stagehand.stagehand_wrapper_agent import (
|
||||
StagehandAgent,
|
||||
)
|
||||
|
||||
agent = StagehandAgent(
|
||||
agent_name="TestAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
result = agent.run(
|
||||
"Navigate to example.com and extract the main content"
|
||||
)
|
||||
|
||||
# Parse result
|
||||
result_data = json.loads(result)
|
||||
assert result_data["status"] == "completed"
|
||||
assert "navigated_to" in result_data["data"]
|
||||
assert (
|
||||
result_data["data"]["navigated_to"]
|
||||
== "https://example.com"
|
||||
)
|
||||
assert "extracted" in result_data["data"]
|
||||
|
||||
@patch(
|
||||
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
|
||||
MockStagehand,
|
||||
)
|
||||
def test_search_task(self):
|
||||
"""Test search functionality."""
|
||||
from examples.stagehand.stagehand_wrapper_agent import (
|
||||
StagehandAgent,
|
||||
)
|
||||
|
||||
agent = StagehandAgent(
|
||||
agent_name="TestAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
result = agent.run(
|
||||
"Go to google.com and search for 'test query'"
|
||||
)
|
||||
|
||||
result_data = json.loads(result)
|
||||
assert result_data["status"] == "completed"
|
||||
assert result_data["data"]["search_query"] == "test query"
|
||||
assert result_data["action"] == "search"
|
||||
|
||||
@patch(
|
||||
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
|
||||
MockStagehand,
|
||||
)
|
||||
def test_cleanup(self):
|
||||
"""Test that cleanup properly closes browser."""
|
||||
from examples.stagehand.stagehand_wrapper_agent import (
|
||||
StagehandAgent,
|
||||
)
|
||||
|
||||
agent = StagehandAgent(
|
||||
agent_name="TestAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
# Initialize the agent
|
||||
agent.run("Navigate to example.com")
|
||||
assert agent._initialized
|
||||
|
||||
# Cleanup
|
||||
agent.cleanup()
|
||||
|
||||
# After cleanup, should be able to run again
|
||||
result = agent.run("Navigate to example.com")
|
||||
assert result is not None
|
||||
|
||||
|
||||
# Test Stagehand Tools
|
||||
class TestStagehandTools:
|
||||
"""Test individual Stagehand tools."""
|
||||
|
||||
@patch("examples.stagehand.stagehand_tools_agent.browser_state")
|
||||
async def test_navigate_tool(self, mock_browser_state):
|
||||
"""Test NavigateTool functionality."""
|
||||
from examples.stagehand.stagehand_tools_agent import (
|
||||
NavigateTool,
|
||||
)
|
||||
|
||||
# Setup mock
|
||||
mock_page = AsyncMock()
|
||||
mock_browser_state.get_page = AsyncMock(
|
||||
return_value=mock_page
|
||||
)
|
||||
mock_browser_state.init_browser = AsyncMock()
|
||||
|
||||
tool = NavigateTool()
|
||||
result = await tool._async_run("https://example.com")
|
||||
|
||||
assert (
|
||||
"Successfully navigated to https://example.com" in result
|
||||
)
|
||||
mock_page.goto.assert_called_once_with("https://example.com")
|
||||
|
||||
@patch("examples.stagehand.stagehand_tools_agent.browser_state")
|
||||
async def test_act_tool(self, mock_browser_state):
|
||||
"""Test ActTool functionality."""
|
||||
from examples.stagehand.stagehand_tools_agent import ActTool
|
||||
|
||||
# Setup mock
|
||||
mock_page = AsyncMock()
|
||||
mock_page.act = AsyncMock(return_value="Action completed")
|
||||
mock_browser_state.get_page = AsyncMock(
|
||||
return_value=mock_page
|
||||
)
|
||||
mock_browser_state.init_browser = AsyncMock()
|
||||
|
||||
tool = ActTool()
|
||||
result = await tool._async_run("click the button")
|
||||
|
||||
assert "Action performed" in result
|
||||
assert "click the button" in result
|
||||
mock_page.act.assert_called_once_with("click the button")
|
||||
|
||||
@patch("examples.stagehand.stagehand_tools_agent.browser_state")
|
||||
async def test_extract_tool(self, mock_browser_state):
|
||||
"""Test ExtractTool functionality."""
|
||||
from examples.stagehand.stagehand_tools_agent import (
|
||||
ExtractTool,
|
||||
)
|
||||
|
||||
# Setup mock
|
||||
mock_page = AsyncMock()
|
||||
mock_page.extract = AsyncMock(
|
||||
return_value={
|
||||
"title": "Test Page",
|
||||
"content": "Test content",
|
||||
}
|
||||
)
|
||||
mock_browser_state.get_page = AsyncMock(
|
||||
return_value=mock_page
|
||||
)
|
||||
mock_browser_state.init_browser = AsyncMock()
|
||||
|
||||
tool = ExtractTool()
|
||||
result = await tool._async_run("extract the page title")
|
||||
|
||||
# Result should be JSON string
|
||||
parsed_result = json.loads(result)
|
||||
assert parsed_result["title"] == "Test Page"
|
||||
assert parsed_result["content"] == "Test content"
|
||||
|
||||
@patch("examples.stagehand.stagehand_tools_agent.browser_state")
|
||||
async def test_observe_tool(self, mock_browser_state):
|
||||
"""Test ObserveTool functionality."""
|
||||
from examples.stagehand.stagehand_tools_agent import (
|
||||
ObserveTool,
|
||||
)
|
||||
|
||||
# Setup mock
|
||||
mock_page = AsyncMock()
|
||||
mock_observations = [
|
||||
MockObserveResult("Search input", "#search"),
|
||||
MockObserveResult("Submit button", "#submit"),
|
||||
]
|
||||
mock_page.observe = AsyncMock(return_value=mock_observations)
|
||||
mock_browser_state.get_page = AsyncMock(
|
||||
return_value=mock_page
|
||||
)
|
||||
mock_browser_state.init_browser = AsyncMock()
|
||||
|
||||
tool = ObserveTool()
|
||||
result = await tool._async_run("find the search box")
|
||||
|
||||
# Result should be JSON string
|
||||
parsed_result = json.loads(result)
|
||||
assert len(parsed_result) == 2
|
||||
assert parsed_result[0]["description"] == "Search input"
|
||||
assert parsed_result[0]["selector"] == "#search"
|
||||
|
||||
|
||||
# Test MCP integration
|
||||
class TestStagehandMCP:
|
||||
"""Test Stagehand MCP server integration."""
|
||||
|
||||
def test_mcp_agent_initialization(self):
|
||||
"""Test that MCP agent initializes with correct parameters."""
|
||||
from examples.stagehand.stagehand_mcp_agent import (
|
||||
StagehandMCPAgent,
|
||||
)
|
||||
|
||||
mcp_agent = StagehandMCPAgent(
|
||||
agent_name="TestMCPAgent",
|
||||
mcp_server_url="http://localhost:3000/sse",
|
||||
model_name="gpt-4o-mini",
|
||||
)
|
||||
|
||||
assert mcp_agent.agent.agent_name == "TestMCPAgent"
|
||||
assert mcp_agent.agent.mcp_url == "http://localhost:3000/sse"
|
||||
assert mcp_agent.agent.model_name == "gpt-4o-mini"
|
||||
|
||||
def test_multi_session_swarm_creation(self):
|
||||
"""Test multi-session browser swarm creation."""
|
||||
from examples.stagehand.stagehand_mcp_agent import (
|
||||
MultiSessionBrowserSwarm,
|
||||
)
|
||||
|
||||
swarm = MultiSessionBrowserSwarm(
|
||||
mcp_server_url="http://localhost:3000/sse",
|
||||
num_agents=3,
|
||||
)
|
||||
|
||||
assert len(swarm.agents) == 3
|
||||
assert swarm.agents[0].agent_name == "DataExtractor_0"
|
||||
assert swarm.agents[1].agent_name == "FormFiller_1"
|
||||
assert swarm.agents[2].agent_name == "WebMonitor_2"
|
||||
|
||||
@patch("swarms.Agent.run")
|
||||
def test_task_distribution(self, mock_run):
|
||||
"""Test task distribution among swarm agents."""
|
||||
from examples.stagehand.stagehand_mcp_agent import (
|
||||
MultiSessionBrowserSwarm,
|
||||
)
|
||||
|
||||
mock_run.return_value = "Task completed"
|
||||
|
||||
swarm = MultiSessionBrowserSwarm(num_agents=2)
|
||||
tasks = ["Task 1", "Task 2", "Task 3"]
|
||||
|
||||
results = swarm.distribute_tasks(tasks)
|
||||
|
||||
assert len(results) == 3
|
||||
assert all(result == "Task completed" for result in results)
|
||||
assert mock_run.call_count == 3
|
||||
|
||||
|
||||
# Test multi-agent workflows
|
||||
class TestMultiAgentWorkflows:
|
||||
"""Test multi-agent workflow configurations."""
|
||||
|
||||
@patch(
|
||||
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
|
||||
MockStagehand,
|
||||
)
|
||||
def test_price_comparison_workflow_creation(self):
|
||||
"""Test creation of price comparison workflow."""
|
||||
from examples.stagehand.stagehand_multi_agent_workflow import (
|
||||
create_price_comparison_workflow,
|
||||
)
|
||||
|
||||
workflow = create_price_comparison_workflow()
|
||||
|
||||
# Should be a SequentialWorkflow with 2 agents
|
||||
assert len(workflow.agents) == 2
|
||||
# First agent should be a ConcurrentWorkflow
|
||||
assert hasattr(workflow.agents[0], "agents")
|
||||
# Second agent should be the analysis agent
|
||||
assert workflow.agents[1].agent_name == "PriceAnalysisAgent"
|
||||
|
||||
@patch(
|
||||
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
|
||||
MockStagehand,
|
||||
)
|
||||
def test_competitive_analysis_workflow_creation(self):
|
||||
"""Test creation of competitive analysis workflow."""
|
||||
from examples.stagehand.stagehand_multi_agent_workflow import (
|
||||
create_competitive_analysis_workflow,
|
||||
)
|
||||
|
||||
workflow = create_competitive_analysis_workflow()
|
||||
|
||||
# Should have 3 agents in the rearrange pattern
|
||||
assert len(workflow.agents) == 3
|
||||
assert (
|
||||
workflow.flow
|
||||
== "company_researcher -> social_media_agent -> report_compiler"
|
||||
)
|
||||
|
||||
@patch(
|
||||
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
|
||||
MockStagehand,
|
||||
)
|
||||
def test_automated_testing_workflow_creation(self):
|
||||
"""Test creation of automated testing workflow."""
|
||||
from examples.stagehand.stagehand_multi_agent_workflow import (
|
||||
create_automated_testing_workflow,
|
||||
)
|
||||
|
||||
workflow = create_automated_testing_workflow()
|
||||
|
||||
# Should be a SequentialWorkflow
|
||||
assert len(workflow.agents) == 2
|
||||
# First should be concurrent testing
|
||||
assert hasattr(workflow.agents[0], "agents")
|
||||
assert (
|
||||
len(workflow.agents[0].agents) == 3
|
||||
) # UI, Form, Accessibility testers
|
||||
|
||||
@patch(
|
||||
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
|
||||
MockStagehand,
|
||||
)
|
||||
def test_news_aggregation_workflow_creation(self):
|
||||
"""Test creation of news aggregation workflow."""
|
||||
from examples.stagehand.stagehand_multi_agent_workflow import (
|
||||
create_news_aggregation_workflow,
|
||||
)
|
||||
|
||||
workflow = create_news_aggregation_workflow()
|
||||
|
||||
# Should be a SequentialWorkflow with 3 stages
|
||||
assert len(workflow.agents) == 3
|
||||
# First stage should be concurrent scrapers
|
||||
assert hasattr(workflow.agents[0], "agents")
|
||||
assert len(workflow.agents[0].agents) == 3 # 3 news sources
|
||||
|
||||
|
||||
# Integration tests
|
||||
class TestIntegration:
|
||||
"""End-to-end integration tests."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
"examples.stagehand.stagehand_wrapper_agent.Stagehand",
|
||||
MockStagehand,
|
||||
)
|
||||
async def test_full_browser_automation_flow(self):
|
||||
"""Test a complete browser automation flow."""
|
||||
from examples.stagehand.stagehand_wrapper_agent import (
|
||||
StagehandAgent,
|
||||
)
|
||||
|
||||
agent = StagehandAgent(
|
||||
agent_name="IntegrationTestAgent",
|
||||
model_name="gpt-4o-mini",
|
||||
env="LOCAL",
|
||||
)
|
||||
|
||||
# Test navigation
|
||||
nav_result = agent.run("Navigate to example.com")
|
||||
assert "navigated_to" in nav_result
|
||||
|
||||
# Test extraction
|
||||
extract_result = agent.run("Extract all text from the page")
|
||||
assert "extracted" in extract_result
|
||||
|
||||
# Test observation
|
||||
observe_result = agent.run("Find all buttons on the page")
|
||||
assert "observation" in observe_result
|
||||
|
||||
# Cleanup
|
||||
agent.cleanup()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
@ -0,0 +1,302 @@
|
||||
"""
|
||||
Simple tests for Stagehand Integration with Swarms
|
||||
=================================================
|
||||
|
||||
These tests verify the basic structure and functionality of the
|
||||
Stagehand integration without requiring external dependencies.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
|
||||
class TestStagehandIntegrationStructure:
|
||||
"""Test that integration files have correct structure."""
|
||||
|
||||
def test_examples_directory_exists(self):
|
||||
"""Test that examples directory structure is correct."""
|
||||
import os
|
||||
|
||||
base_path = "examples/stagehand"
|
||||
assert os.path.exists(base_path)
|
||||
|
||||
expected_files = [
|
||||
"1_stagehand_wrapper_agent.py",
|
||||
"2_stagehand_tools_agent.py",
|
||||
"3_stagehand_mcp_agent.py",
|
||||
"4_stagehand_multi_agent_workflow.py",
|
||||
"README.md",
|
||||
"requirements.txt",
|
||||
]
|
||||
|
||||
for file in expected_files:
|
||||
file_path = os.path.join(base_path, file)
|
||||
assert os.path.exists(file_path), f"Missing file: {file}"
|
||||
|
||||
def test_wrapper_agent_imports(self):
|
||||
"""Test that wrapper agent has correct imports."""
|
||||
with open(
|
||||
"examples/stagehand/1_stagehand_wrapper_agent.py", "r"
|
||||
) as f:
|
||||
content = f.read()
|
||||
|
||||
# Check for required imports
|
||||
assert "from swarms import Agent" in content
|
||||
assert "import asyncio" in content
|
||||
assert "import json" in content
|
||||
assert "class StagehandAgent" in content
|
||||
|
||||
def test_tools_agent_imports(self):
|
||||
"""Test that tools agent has correct imports."""
|
||||
with open(
|
||||
"examples/stagehand/2_stagehand_tools_agent.py", "r"
|
||||
) as f:
|
||||
content = f.read()
|
||||
|
||||
# Check for required imports
|
||||
assert "from swarms import Agent" in content
|
||||
assert "def navigate_browser" in content
|
||||
assert "def browser_act" in content
|
||||
assert "def browser_extract" in content
|
||||
|
||||
def test_mcp_agent_imports(self):
|
||||
"""Test that MCP agent has correct imports."""
|
||||
with open(
|
||||
"examples/stagehand/3_stagehand_mcp_agent.py", "r"
|
||||
) as f:
|
||||
content = f.read()
|
||||
|
||||
# Check for required imports
|
||||
assert "from swarms import Agent" in content
|
||||
assert "class StagehandMCPAgent" in content
|
||||
assert "mcp_url" in content
|
||||
|
||||
def test_workflow_agent_imports(self):
|
||||
"""Test that workflow agent has correct imports."""
|
||||
with open(
|
||||
"examples/stagehand/4_stagehand_multi_agent_workflow.py",
|
||||
"r",
|
||||
) as f:
|
||||
content = f.read()
|
||||
|
||||
# Check for required imports
|
||||
assert (
|
||||
"from swarms import Agent, SequentialWorkflow, ConcurrentWorkflow"
|
||||
in content
|
||||
)
|
||||
assert (
|
||||
"from swarms.structs.agent_rearrange import AgentRearrange"
|
||||
in content
|
||||
)
|
||||
|
||||
|
||||
class TestStagehandMockIntegration:
|
||||
"""Test Stagehand integration with mocked dependencies."""
|
||||
|
||||
def test_mock_stagehand_initialization(self):
|
||||
"""Test that Stagehand can be mocked and initialized."""
|
||||
|
||||
# Setup mock without importing actual stagehand
|
||||
mock_stagehand = MagicMock()
|
||||
mock_instance = MagicMock()
|
||||
mock_instance.init = MagicMock()
|
||||
mock_stagehand.return_value = mock_instance
|
||||
|
||||
# Mock config creation
|
||||
config = MagicMock()
|
||||
stagehand_instance = mock_stagehand(config)
|
||||
|
||||
# Verify mock works
|
||||
assert stagehand_instance is not None
|
||||
assert hasattr(stagehand_instance, "init")
|
||||
|
||||
def test_json_serialization(self):
|
||||
"""Test JSON serialization for agent responses."""
|
||||
|
||||
# Test data that would come from browser automation
|
||||
test_data = {
|
||||
"task": "Navigate to example.com",
|
||||
"status": "completed",
|
||||
"data": {
|
||||
"navigated_to": "https://example.com",
|
||||
"extracted": ["item1", "item2"],
|
||||
"action": "navigate",
|
||||
},
|
||||
}
|
||||
|
||||
# Test serialization
|
||||
json_result = json.dumps(test_data, indent=2)
|
||||
assert isinstance(json_result, str)
|
||||
|
||||
# Test deserialization
|
||||
parsed_data = json.loads(json_result)
|
||||
assert parsed_data["task"] == "Navigate to example.com"
|
||||
assert parsed_data["status"] == "completed"
|
||||
assert len(parsed_data["data"]["extracted"]) == 2
|
||||
|
||||
def test_url_extraction_logic(self):
|
||||
"""Test URL extraction logic from task strings."""
|
||||
import re
|
||||
|
||||
# Test cases
|
||||
test_cases = [
|
||||
(
|
||||
"Navigate to https://example.com",
|
||||
["https://example.com"],
|
||||
),
|
||||
("Go to google.com and search", ["google.com"]),
|
||||
(
|
||||
"Visit https://github.com/repo",
|
||||
["https://github.com/repo"],
|
||||
),
|
||||
("Open example.org", ["example.org"]),
|
||||
]
|
||||
|
||||
url_pattern = r"https?://[^\s]+"
|
||||
domain_pattern = r"(\w+\.\w+)"
|
||||
|
||||
for task, expected in test_cases:
|
||||
# Extract full URLs
|
||||
urls = re.findall(url_pattern, task)
|
||||
|
||||
# If no full URLs, extract domains
|
||||
if not urls:
|
||||
domains = re.findall(domain_pattern, task)
|
||||
if domains:
|
||||
urls = domains
|
||||
|
||||
assert (
|
||||
len(urls) > 0
|
||||
), f"Failed to extract URL from: {task}"
|
||||
assert (
|
||||
urls[0] in expected
|
||||
), f"Expected {expected}, got {urls}"
|
||||
|
||||
|
||||
class TestSwarmsPatternsCompliance:
|
||||
"""Test compliance with Swarms framework patterns."""
|
||||
|
||||
def test_agent_inheritance_pattern(self):
|
||||
"""Test that wrapper agent follows Swarms Agent inheritance pattern."""
|
||||
|
||||
# Read the wrapper agent file
|
||||
with open(
|
||||
"examples/stagehand/1_stagehand_wrapper_agent.py", "r"
|
||||
) as f:
|
||||
content = f.read()
|
||||
|
||||
# Check inheritance pattern
|
||||
assert "class StagehandAgent(SwarmsAgent):" in content
|
||||
assert "def run(self, task: str" in content
|
||||
assert "return" in content
|
||||
|
||||
def test_tools_pattern(self):
|
||||
"""Test that tools follow Swarms function-based pattern."""
|
||||
|
||||
# Read the tools agent file
|
||||
with open(
|
||||
"examples/stagehand/2_stagehand_tools_agent.py", "r"
|
||||
) as f:
|
||||
content = f.read()
|
||||
|
||||
# Check function-based tool pattern
|
||||
assert "def navigate_browser(url: str) -> str:" in content
|
||||
assert "def browser_act(action: str) -> str:" in content
|
||||
assert "def browser_extract(query: str) -> str:" in content
|
||||
assert "def browser_observe(query: str) -> str:" in content
|
||||
|
||||
def test_mcp_integration_pattern(self):
|
||||
"""Test MCP integration follows Swarms pattern."""
|
||||
|
||||
# Read the MCP agent file
|
||||
with open(
|
||||
"examples/stagehand/3_stagehand_mcp_agent.py", "r"
|
||||
) as f:
|
||||
content = f.read()
|
||||
|
||||
# Check MCP pattern
|
||||
assert "mcp_url=" in content
|
||||
assert "Agent(" in content
|
||||
|
||||
def test_workflow_patterns(self):
|
||||
"""Test workflow patterns are properly used."""
|
||||
|
||||
# Read the workflow file
|
||||
with open(
|
||||
"examples/stagehand/4_stagehand_multi_agent_workflow.py",
|
||||
"r",
|
||||
) as f:
|
||||
content = f.read()
|
||||
|
||||
# Check workflow patterns
|
||||
assert "SequentialWorkflow" in content
|
||||
assert "ConcurrentWorkflow" in content
|
||||
assert "AgentRearrange" in content
|
||||
|
||||
|
||||
class TestDocumentationAndExamples:
|
||||
"""Test documentation and example completeness."""
|
||||
|
||||
def test_readme_completeness(self):
|
||||
"""Test that README contains essential information."""
|
||||
|
||||
with open("examples/stagehand/README.md", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
required_sections = [
|
||||
"# Stagehand Browser Automation Integration",
|
||||
"## Overview",
|
||||
"## Examples",
|
||||
"## Setup",
|
||||
"## Use Cases",
|
||||
"## Best Practices",
|
||||
]
|
||||
|
||||
for section in required_sections:
|
||||
assert section in content, f"Missing section: {section}"
|
||||
|
||||
def test_requirements_file(self):
|
||||
"""Test that requirements file has necessary dependencies."""
|
||||
|
||||
with open("examples/stagehand/requirements.txt", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
required_deps = [
|
||||
"swarms",
|
||||
"stagehand",
|
||||
"python-dotenv",
|
||||
"pydantic",
|
||||
"loguru",
|
||||
]
|
||||
|
||||
for dep in required_deps:
|
||||
assert dep in content, f"Missing dependency: {dep}"
|
||||
|
||||
def test_example_files_have_docstrings(self):
|
||||
"""Test that example files have proper docstrings."""
|
||||
|
||||
example_files = [
|
||||
"examples/stagehand/1_stagehand_wrapper_agent.py",
|
||||
"examples/stagehand/2_stagehand_tools_agent.py",
|
||||
"examples/stagehand/3_stagehand_mcp_agent.py",
|
||||
"examples/stagehand/4_stagehand_multi_agent_workflow.py",
|
||||
]
|
||||
|
||||
for file_path in example_files:
|
||||
with open(file_path, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Check for module docstring
|
||||
assert (
|
||||
'"""' in content[:500]
|
||||
), f"Missing docstring in {file_path}"
|
||||
|
||||
# Check for main execution block
|
||||
assert (
|
||||
'if __name__ == "__main__":' in content
|
||||
), f"Missing main block in {file_path}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
Loading…
Reference in new issue