parent
daf0891611
commit
b04e60ca17
@ -0,0 +1,257 @@
|
|||||||
|
"""
|
||||||
|
Stagehand Browser Automation Agent for Swarms
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
This example demonstrates how to create a Swarms-compatible agent
|
||||||
|
that wraps Stagehand's browser automation capabilities.
|
||||||
|
|
||||||
|
The StagehandAgent class inherits from the Swarms Agent base class
|
||||||
|
and implements browser automation through natural language commands.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from swarms import Agent as SwarmsAgent
|
||||||
|
from stagehand import Stagehand, StagehandConfig
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
class WebData(BaseModel):
|
||||||
|
"""Schema for extracted web data."""
|
||||||
|
|
||||||
|
url: str = Field(..., description="The URL of the page")
|
||||||
|
title: str = Field(..., description="Page title")
|
||||||
|
content: str = Field(..., description="Extracted content")
|
||||||
|
metadata: Dict[str, Any] = Field(
|
||||||
|
default_factory=dict, description="Additional metadata"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StagehandAgent(SwarmsAgent):
|
||||||
|
"""
|
||||||
|
A Swarms agent that integrates Stagehand for browser automation.
|
||||||
|
|
||||||
|
This agent can navigate websites, extract data, perform actions,
|
||||||
|
and observe page elements using natural language instructions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
agent_name: str = "StagehandBrowserAgent",
|
||||||
|
browserbase_api_key: Optional[str] = None,
|
||||||
|
browserbase_project_id: Optional[str] = None,
|
||||||
|
model_name: str = "gpt-4o-mini",
|
||||||
|
model_api_key: Optional[str] = None,
|
||||||
|
env: str = "LOCAL", # LOCAL or BROWSERBASE
|
||||||
|
*args,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize the StagehandAgent.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
agent_name: Name of the agent
|
||||||
|
browserbase_api_key: API key for Browserbase (if using cloud)
|
||||||
|
browserbase_project_id: Project ID for Browserbase
|
||||||
|
model_name: LLM model to use
|
||||||
|
model_api_key: API key for the model
|
||||||
|
env: Environment - LOCAL or BROWSERBASE
|
||||||
|
"""
|
||||||
|
# Don't pass stagehand-specific args to parent
|
||||||
|
super().__init__(agent_name=agent_name, *args, **kwargs)
|
||||||
|
|
||||||
|
self.stagehand_config = StagehandConfig(
|
||||||
|
env=env,
|
||||||
|
api_key=browserbase_api_key
|
||||||
|
or os.getenv("BROWSERBASE_API_KEY"),
|
||||||
|
project_id=browserbase_project_id
|
||||||
|
or os.getenv("BROWSERBASE_PROJECT_ID"),
|
||||||
|
model_name=model_name,
|
||||||
|
model_api_key=model_api_key or os.getenv("OPENAI_API_KEY"),
|
||||||
|
)
|
||||||
|
self.stagehand = None
|
||||||
|
self._initialized = False
|
||||||
|
|
||||||
|
async def _init_stagehand(self):
|
||||||
|
"""Initialize Stagehand instance."""
|
||||||
|
if not self._initialized:
|
||||||
|
self.stagehand = Stagehand(self.stagehand_config)
|
||||||
|
await self.stagehand.init()
|
||||||
|
self._initialized = True
|
||||||
|
logger.info(f"Stagehand initialized for {self.agent_name}")
|
||||||
|
|
||||||
|
async def _close_stagehand(self):
|
||||||
|
"""Close Stagehand instance."""
|
||||||
|
if self.stagehand and self._initialized:
|
||||||
|
await self.stagehand.close()
|
||||||
|
self._initialized = False
|
||||||
|
logger.info(f"Stagehand closed for {self.agent_name}")
|
||||||
|
|
||||||
|
def run(self, task: str, *args, **kwargs) -> str:
|
||||||
|
"""
|
||||||
|
Execute a browser automation task.
|
||||||
|
|
||||||
|
The task string should contain instructions like:
|
||||||
|
- "Navigate to example.com and extract the main content"
|
||||||
|
- "Go to google.com and search for 'AI agents'"
|
||||||
|
- "Extract all company names from https://ycombinator.com"
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task: Natural language description of the browser task
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
String result of the task execution
|
||||||
|
"""
|
||||||
|
return asyncio.run(self._async_run(task, *args, **kwargs))
|
||||||
|
|
||||||
|
async def _async_run(
|
||||||
|
self, task: str, *args, **kwargs
|
||||||
|
) -> str:
|
||||||
|
"""Async implementation of run method."""
|
||||||
|
try:
|
||||||
|
await self._init_stagehand()
|
||||||
|
|
||||||
|
# Parse the task to determine actions
|
||||||
|
result = await self._execute_browser_task(task)
|
||||||
|
|
||||||
|
return json.dumps(result, indent=2)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in browser task: {str(e)}")
|
||||||
|
return f"Error executing browser task: {str(e)}"
|
||||||
|
finally:
|
||||||
|
# Keep browser open for potential follow-up tasks
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def _execute_browser_task(
|
||||||
|
self, task: str
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Execute a browser task based on natural language instructions.
|
||||||
|
|
||||||
|
This method interprets the task and calls appropriate Stagehand methods.
|
||||||
|
"""
|
||||||
|
page = self.stagehand.page
|
||||||
|
result = {"task": task, "status": "completed", "data": {}}
|
||||||
|
|
||||||
|
# Determine if task involves navigation
|
||||||
|
if any(
|
||||||
|
keyword in task.lower()
|
||||||
|
for keyword in ["navigate", "go to", "visit", "open"]
|
||||||
|
):
|
||||||
|
# Extract URL from task
|
||||||
|
import re
|
||||||
|
|
||||||
|
url_pattern = r"https?://[^\s]+"
|
||||||
|
urls = re.findall(url_pattern, task)
|
||||||
|
if not urls and any(
|
||||||
|
domain in task for domain in [".com", ".org", ".net"]
|
||||||
|
):
|
||||||
|
# Try to extract domain names
|
||||||
|
domain_pattern = r"(\w+\.\w+)"
|
||||||
|
domains = re.findall(domain_pattern, task)
|
||||||
|
if domains:
|
||||||
|
urls = [f"https://{domain}" for domain in domains]
|
||||||
|
|
||||||
|
if urls:
|
||||||
|
url = urls[0]
|
||||||
|
await page.goto(url)
|
||||||
|
result["data"]["navigated_to"] = url
|
||||||
|
logger.info(f"Navigated to {url}")
|
||||||
|
|
||||||
|
# Determine action type
|
||||||
|
if "extract" in task.lower():
|
||||||
|
# Perform extraction
|
||||||
|
extraction_prompt = task.replace("extract", "").strip()
|
||||||
|
extracted = await page.extract(extraction_prompt)
|
||||||
|
result["data"]["extracted"] = extracted
|
||||||
|
result["action"] = "extract"
|
||||||
|
|
||||||
|
elif "click" in task.lower() or "press" in task.lower():
|
||||||
|
# Perform action
|
||||||
|
action_result = await page.act(task)
|
||||||
|
result["data"]["action_performed"] = str(action_result)
|
||||||
|
result["action"] = "act"
|
||||||
|
|
||||||
|
elif "search" in task.lower():
|
||||||
|
# Perform search action
|
||||||
|
search_query = task.split("search for")[-1].strip().strip("'\"")
|
||||||
|
# First, find the search box
|
||||||
|
search_box = await page.observe("find the search input field")
|
||||||
|
if search_box:
|
||||||
|
# Click on search box and type
|
||||||
|
await page.act(f"click on {search_box[0]}")
|
||||||
|
await page.act(f"type '{search_query}'")
|
||||||
|
await page.act("press Enter")
|
||||||
|
result["data"]["search_query"] = search_query
|
||||||
|
result["action"] = "search"
|
||||||
|
|
||||||
|
elif "observe" in task.lower() or "find" in task.lower():
|
||||||
|
# Perform observation
|
||||||
|
observation = await page.observe(task)
|
||||||
|
result["data"]["observation"] = [
|
||||||
|
{"description": obs.description, "selector": obs.selector}
|
||||||
|
for obs in observation
|
||||||
|
]
|
||||||
|
result["action"] = "observe"
|
||||||
|
|
||||||
|
else:
|
||||||
|
# General action
|
||||||
|
action_result = await page.act(task)
|
||||||
|
result["data"]["action_result"] = str(action_result)
|
||||||
|
result["action"] = "general"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
"""Clean up browser resources."""
|
||||||
|
if self._initialized:
|
||||||
|
asyncio.run(self._close_stagehand())
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
"""Ensure browser is closed on deletion."""
|
||||||
|
self.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Create a Stagehand browser agent
|
||||||
|
browser_agent = StagehandAgent(
|
||||||
|
agent_name="WebScraperAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL", # Use LOCAL for Playwright, BROWSERBASE for cloud
|
||||||
|
)
|
||||||
|
|
||||||
|
# Example 1: Navigate and extract data
|
||||||
|
print("Example 1: Basic navigation and extraction")
|
||||||
|
result1 = browser_agent.run(
|
||||||
|
"Navigate to https://news.ycombinator.com and extract the titles of the top 5 stories"
|
||||||
|
)
|
||||||
|
print(result1)
|
||||||
|
print("\n" + "=" * 50 + "\n")
|
||||||
|
|
||||||
|
# Example 2: Perform a search
|
||||||
|
print("Example 2: Search on a website")
|
||||||
|
result2 = browser_agent.run(
|
||||||
|
"Go to google.com and search for 'Swarms AI framework'"
|
||||||
|
)
|
||||||
|
print(result2)
|
||||||
|
print("\n" + "=" * 50 + "\n")
|
||||||
|
|
||||||
|
# Example 3: Extract structured data
|
||||||
|
print("Example 3: Extract specific information")
|
||||||
|
result3 = browser_agent.run(
|
||||||
|
"Navigate to https://example.com and extract the main heading and first paragraph"
|
||||||
|
)
|
||||||
|
print(result3)
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
browser_agent.cleanup()
|
@ -0,0 +1,332 @@
|
|||||||
|
"""
|
||||||
|
Stagehand Tools for Swarms Agent
|
||||||
|
=================================
|
||||||
|
|
||||||
|
This example demonstrates how to create Stagehand browser automation tools
|
||||||
|
that can be used by a standard Swarms Agent. Each Stagehand method (act,
|
||||||
|
extract, observe) becomes a separate tool that the agent can use.
|
||||||
|
|
||||||
|
This approach gives the agent more fine-grained control over browser
|
||||||
|
automation tasks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from swarms import Agent
|
||||||
|
from swarms.tools.base_tool import BaseTool
|
||||||
|
from stagehand import Stagehand, StagehandConfig
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserState:
|
||||||
|
"""Singleton to manage browser state across tools."""
|
||||||
|
|
||||||
|
_instance = None
|
||||||
|
_stagehand = None
|
||||||
|
_initialized = False
|
||||||
|
|
||||||
|
def __new__(cls):
|
||||||
|
if cls._instance is None:
|
||||||
|
cls._instance = super().__new__(cls)
|
||||||
|
return cls._instance
|
||||||
|
|
||||||
|
async def init_browser(
|
||||||
|
self,
|
||||||
|
env: str = "LOCAL",
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
project_id: Optional[str] = None,
|
||||||
|
model_name: str = "gpt-4o-mini",
|
||||||
|
model_api_key: Optional[str] = None,
|
||||||
|
):
|
||||||
|
"""Initialize the browser if not already initialized."""
|
||||||
|
if not self._initialized:
|
||||||
|
config = StagehandConfig(
|
||||||
|
env=env,
|
||||||
|
api_key=api_key or os.getenv("BROWSERBASE_API_KEY"),
|
||||||
|
project_id=project_id or os.getenv("BROWSERBASE_PROJECT_ID"),
|
||||||
|
model_name=model_name,
|
||||||
|
model_api_key=model_api_key or os.getenv("OPENAI_API_KEY"),
|
||||||
|
)
|
||||||
|
self._stagehand = Stagehand(config)
|
||||||
|
await self._stagehand.init()
|
||||||
|
self._initialized = True
|
||||||
|
logger.info("Stagehand browser initialized")
|
||||||
|
|
||||||
|
async def get_page(self):
|
||||||
|
"""Get the current page instance."""
|
||||||
|
if not self._initialized:
|
||||||
|
raise RuntimeError("Browser not initialized. Call init_browser first.")
|
||||||
|
return self._stagehand.page
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
"""Close the browser."""
|
||||||
|
if self._initialized and self._stagehand:
|
||||||
|
await self._stagehand.close()
|
||||||
|
self._initialized = False
|
||||||
|
logger.info("Stagehand browser closed")
|
||||||
|
|
||||||
|
|
||||||
|
# Browser state instance
|
||||||
|
browser_state = BrowserState()
|
||||||
|
|
||||||
|
|
||||||
|
class NavigateTool(BaseTool):
|
||||||
|
"""Tool for navigating to URLs in the browser."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
name="navigate_browser",
|
||||||
|
description="Navigate to a URL in the browser. Input should be a valid URL starting with http:// or https://",
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run(self, url: str) -> str:
|
||||||
|
"""Navigate to the specified URL."""
|
||||||
|
return asyncio.run(self._async_run(url))
|
||||||
|
|
||||||
|
async def _async_run(self, url: str) -> str:
|
||||||
|
try:
|
||||||
|
await browser_state.init_browser()
|
||||||
|
page = await browser_state.get_page()
|
||||||
|
|
||||||
|
# Ensure URL has protocol
|
||||||
|
if not url.startswith(("http://", "https://")):
|
||||||
|
url = f"https://{url}"
|
||||||
|
|
||||||
|
await page.goto(url)
|
||||||
|
return f"Successfully navigated to {url}"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Navigation error: {str(e)}")
|
||||||
|
return f"Failed to navigate to {url}: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
class ActTool(BaseTool):
|
||||||
|
"""Tool for performing actions on web pages."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
name="browser_act",
|
||||||
|
description=(
|
||||||
|
"Perform an action on the current web page using natural language. "
|
||||||
|
"Examples: 'click the submit button', 'type hello@example.com in the email field', "
|
||||||
|
"'scroll down', 'press Enter'"
|
||||||
|
),
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run(self, action: str) -> str:
|
||||||
|
"""Perform the specified action."""
|
||||||
|
return asyncio.run(self._async_run(action))
|
||||||
|
|
||||||
|
async def _async_run(self, action: str) -> str:
|
||||||
|
try:
|
||||||
|
await browser_state.init_browser()
|
||||||
|
page = await browser_state.get_page()
|
||||||
|
|
||||||
|
result = await page.act(action)
|
||||||
|
return f"Action performed: {action}. Result: {result}"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Action error: {str(e)}")
|
||||||
|
return f"Failed to perform action '{action}': {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractTool(BaseTool):
|
||||||
|
"""Tool for extracting data from web pages."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
name="browser_extract",
|
||||||
|
description=(
|
||||||
|
"Extract information from the current web page using natural language. "
|
||||||
|
"Examples: 'extract all email addresses', 'get the main article text', "
|
||||||
|
"'find all product prices', 'extract the page title and meta description'"
|
||||||
|
),
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run(self, query: str) -> str:
|
||||||
|
"""Extract information based on the query."""
|
||||||
|
return asyncio.run(self._async_run(query))
|
||||||
|
|
||||||
|
async def _async_run(self, query: str) -> str:
|
||||||
|
try:
|
||||||
|
await browser_state.init_browser()
|
||||||
|
page = await browser_state.get_page()
|
||||||
|
|
||||||
|
extracted = await page.extract(query)
|
||||||
|
|
||||||
|
# Convert to JSON string for agent consumption
|
||||||
|
if isinstance(extracted, (dict, list)):
|
||||||
|
return json.dumps(extracted, indent=2)
|
||||||
|
else:
|
||||||
|
return str(extracted)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Extraction error: {str(e)}")
|
||||||
|
return f"Failed to extract '{query}': {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
class ObserveTool(BaseTool):
|
||||||
|
"""Tool for observing elements on web pages."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
name="browser_observe",
|
||||||
|
description=(
|
||||||
|
"Observe and find elements on the current web page using natural language. "
|
||||||
|
"Returns information about elements including their selectors. "
|
||||||
|
"Examples: 'find the search box', 'locate the submit button', "
|
||||||
|
"'find all navigation links'"
|
||||||
|
),
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run(self, query: str) -> str:
|
||||||
|
"""Observe elements based on the query."""
|
||||||
|
return asyncio.run(self._async_run(query))
|
||||||
|
|
||||||
|
async def _async_run(self, query: str) -> str:
|
||||||
|
try:
|
||||||
|
await browser_state.init_browser()
|
||||||
|
page = await browser_state.get_page()
|
||||||
|
|
||||||
|
observations = await page.observe(query)
|
||||||
|
|
||||||
|
# Format observations for readability
|
||||||
|
result = []
|
||||||
|
for obs in observations:
|
||||||
|
result.append({
|
||||||
|
"description": obs.description,
|
||||||
|
"selector": obs.selector,
|
||||||
|
"method": obs.method
|
||||||
|
})
|
||||||
|
|
||||||
|
return json.dumps(result, indent=2)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Observation error: {str(e)}")
|
||||||
|
return f"Failed to observe '{query}': {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
class ScreenshotTool(BaseTool):
|
||||||
|
"""Tool for taking screenshots of the current page."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
name="browser_screenshot",
|
||||||
|
description="Take a screenshot of the current web page. Optionally provide a filename.",
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run(self, filename: str = "screenshot.png") -> str:
|
||||||
|
"""Take a screenshot."""
|
||||||
|
return asyncio.run(self._async_run(filename))
|
||||||
|
|
||||||
|
async def _async_run(self, filename: str) -> str:
|
||||||
|
try:
|
||||||
|
await browser_state.init_browser()
|
||||||
|
page = await browser_state.get_page()
|
||||||
|
|
||||||
|
# Ensure .png extension
|
||||||
|
if not filename.endswith(".png"):
|
||||||
|
filename += ".png"
|
||||||
|
|
||||||
|
# Get the underlying Playwright page
|
||||||
|
playwright_page = page.page
|
||||||
|
await playwright_page.screenshot(path=filename)
|
||||||
|
|
||||||
|
return f"Screenshot saved to {filename}"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Screenshot error: {str(e)}")
|
||||||
|
return f"Failed to take screenshot: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
class CloseBrowserTool(BaseTool):
|
||||||
|
"""Tool for closing the browser."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
name="close_browser",
|
||||||
|
description="Close the browser when done with automation tasks",
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run(self, *args) -> str:
|
||||||
|
"""Close the browser."""
|
||||||
|
return asyncio.run(self._async_run())
|
||||||
|
|
||||||
|
async def _async_run(self) -> str:
|
||||||
|
try:
|
||||||
|
await browser_state.close()
|
||||||
|
return "Browser closed successfully"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Close browser error: {str(e)}")
|
||||||
|
return f"Failed to close browser: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Create browser automation tools
|
||||||
|
navigate_tool = NavigateTool()
|
||||||
|
act_tool = ActTool()
|
||||||
|
extract_tool = ExtractTool()
|
||||||
|
observe_tool = ObserveTool()
|
||||||
|
screenshot_tool = ScreenshotTool()
|
||||||
|
close_browser_tool = CloseBrowserTool()
|
||||||
|
|
||||||
|
# Create a Swarms agent with browser tools
|
||||||
|
browser_agent = Agent(
|
||||||
|
agent_name="BrowserAutomationAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
max_loops=1,
|
||||||
|
tools=[
|
||||||
|
navigate_tool,
|
||||||
|
act_tool,
|
||||||
|
extract_tool,
|
||||||
|
observe_tool,
|
||||||
|
screenshot_tool,
|
||||||
|
close_browser_tool,
|
||||||
|
],
|
||||||
|
system_prompt="""You are a web browser automation specialist. You can:
|
||||||
|
1. Navigate to websites using the navigate_browser tool
|
||||||
|
2. Perform actions like clicking and typing using the browser_act tool
|
||||||
|
3. Extract information from pages using the browser_extract tool
|
||||||
|
4. Find and observe elements using the browser_observe tool
|
||||||
|
5. Take screenshots using the browser_screenshot tool
|
||||||
|
6. Close the browser when done using the close_browser tool
|
||||||
|
|
||||||
|
Always start by navigating to a URL before trying to interact with a page.
|
||||||
|
Be specific in your actions and extractions. When done with tasks, close the browser.""",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Example 1: Research task
|
||||||
|
print("Example 1: Automated web research")
|
||||||
|
result1 = browser_agent.run(
|
||||||
|
"Go to hackernews (news.ycombinator.com) and extract the titles of the top 5 stories. Then take a screenshot."
|
||||||
|
)
|
||||||
|
print(result1)
|
||||||
|
print("\n" + "=" * 50 + "\n")
|
||||||
|
|
||||||
|
# Example 2: Search task
|
||||||
|
print("Example 2: Perform a web search")
|
||||||
|
result2 = browser_agent.run(
|
||||||
|
"Navigate to google.com, search for 'Python web scraping best practices', and extract the first 3 search result titles"
|
||||||
|
)
|
||||||
|
print(result2)
|
||||||
|
print("\n" + "=" * 50 + "\n")
|
||||||
|
|
||||||
|
# Example 3: Form interaction
|
||||||
|
print("Example 3: Interact with a form")
|
||||||
|
result3 = browser_agent.run(
|
||||||
|
"Go to example.com and observe what elements are on the page. Then extract all the text content."
|
||||||
|
)
|
||||||
|
print(result3)
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
browser_agent.run("Close the browser")
|
@ -0,0 +1,252 @@
|
|||||||
|
"""
|
||||||
|
Stagehand MCP Server Integration with Swarms
|
||||||
|
============================================
|
||||||
|
|
||||||
|
This example demonstrates how to use the Stagehand MCP (Model Context Protocol)
|
||||||
|
server with Swarms agents. The MCP server provides browser automation capabilities
|
||||||
|
as standardized tools that can be discovered and used by agents.
|
||||||
|
|
||||||
|
Prerequisites:
|
||||||
|
1. Install and run the Stagehand MCP server:
|
||||||
|
cd stagehand-mcp-server
|
||||||
|
npm install
|
||||||
|
npm run build
|
||||||
|
npm start
|
||||||
|
|
||||||
|
2. The server will start on http://localhost:3000/sse
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Automatic tool discovery from MCP server
|
||||||
|
- Multi-session browser management
|
||||||
|
- Built-in screenshot resources
|
||||||
|
- Prompt templates for common tasks
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from swarms import Agent
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
class StagehandMCPAgent:
|
||||||
|
"""
|
||||||
|
A Swarms agent that connects to the Stagehand MCP server
|
||||||
|
for browser automation capabilities.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
agent_name: str = "StagehandMCPAgent",
|
||||||
|
mcp_server_url: str = "http://localhost:3000/sse",
|
||||||
|
model_name: str = "gpt-4o-mini",
|
||||||
|
max_loops: int = 1,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize the Stagehand MCP Agent.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
agent_name: Name of the agent
|
||||||
|
mcp_server_url: URL of the Stagehand MCP server
|
||||||
|
model_name: LLM model to use
|
||||||
|
max_loops: Maximum number of reasoning loops
|
||||||
|
"""
|
||||||
|
self.agent = Agent(
|
||||||
|
agent_name=agent_name,
|
||||||
|
model_name=model_name,
|
||||||
|
max_loops=max_loops,
|
||||||
|
# Connect to the Stagehand MCP server
|
||||||
|
mcp_url=mcp_server_url,
|
||||||
|
system_prompt="""You are a web browser automation specialist with access to Stagehand MCP tools.
|
||||||
|
|
||||||
|
Available tools from the MCP server:
|
||||||
|
- navigate: Navigate to a URL
|
||||||
|
- act: Perform actions on web pages (click, type, etc.)
|
||||||
|
- extract: Extract data from web pages
|
||||||
|
- observe: Find and observe elements on pages
|
||||||
|
- screenshot: Take screenshots
|
||||||
|
- createSession: Create new browser sessions for parallel tasks
|
||||||
|
- listSessions: List active browser sessions
|
||||||
|
- closeSession: Close browser sessions
|
||||||
|
|
||||||
|
For multi-page workflows, you can create multiple sessions.
|
||||||
|
Always be specific in your actions and extractions.
|
||||||
|
Remember to close sessions when done with them.""",
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run(self, task: str) -> str:
|
||||||
|
"""Run a browser automation task."""
|
||||||
|
return self.agent.run(task)
|
||||||
|
|
||||||
|
|
||||||
|
class MultiSessionBrowserSwarm:
|
||||||
|
"""
|
||||||
|
A multi-agent swarm that uses multiple browser sessions
|
||||||
|
for parallel web automation tasks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
mcp_server_url: str = "http://localhost:3000/sse",
|
||||||
|
num_agents: int = 3,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize a swarm of browser automation agents.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mcp_server_url: URL of the Stagehand MCP server
|
||||||
|
num_agents: Number of agents to create
|
||||||
|
"""
|
||||||
|
self.agents = []
|
||||||
|
|
||||||
|
# Create specialized agents for different tasks
|
||||||
|
agent_roles = [
|
||||||
|
("DataExtractor", "You specialize in extracting structured data from websites."),
|
||||||
|
("FormFiller", "You specialize in filling out forms and interacting with web applications."),
|
||||||
|
("WebMonitor", "You specialize in monitoring websites for changes and capturing screenshots."),
|
||||||
|
]
|
||||||
|
|
||||||
|
for i in range(min(num_agents, len(agent_roles))):
|
||||||
|
name, specialization = agent_roles[i]
|
||||||
|
agent = Agent(
|
||||||
|
agent_name=f"{name}_{i}",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
max_loops=1,
|
||||||
|
mcp_url=mcp_server_url,
|
||||||
|
system_prompt=f"""You are a web browser automation specialist. {specialization}
|
||||||
|
|
||||||
|
You have access to Stagehand MCP tools including:
|
||||||
|
- createSession: Create a new browser session
|
||||||
|
- navigate_session: Navigate to URLs in a specific session
|
||||||
|
- act_session: Perform actions in a specific session
|
||||||
|
- extract_session: Extract data from a specific session
|
||||||
|
- observe_session: Observe elements in a specific session
|
||||||
|
- closeSession: Close a session when done
|
||||||
|
|
||||||
|
Always create your own session for tasks to work independently from other agents.""",
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
self.agents.append(agent)
|
||||||
|
|
||||||
|
def distribute_tasks(self, tasks: List[str]) -> List[str]:
|
||||||
|
"""Distribute tasks among agents."""
|
||||||
|
results = []
|
||||||
|
|
||||||
|
# Distribute tasks round-robin among agents
|
||||||
|
for i, task in enumerate(tasks):
|
||||||
|
agent_idx = i % len(self.agents)
|
||||||
|
agent = self.agents[agent_idx]
|
||||||
|
|
||||||
|
logger.info(f"Assigning task to {agent.agent_name}: {task}")
|
||||||
|
result = agent.run(task)
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("=" * 70)
|
||||||
|
print("Stagehand MCP Server Integration Examples")
|
||||||
|
print("=" * 70)
|
||||||
|
print("\nMake sure the Stagehand MCP server is running on http://localhost:3000/sse")
|
||||||
|
print("Run: cd stagehand-mcp-server && npm start\n")
|
||||||
|
|
||||||
|
# Example 1: Single agent with MCP tools
|
||||||
|
print("\nExample 1: Single Agent with MCP Tools")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
mcp_agent = StagehandMCPAgent(
|
||||||
|
agent_name="WebResearchAgent",
|
||||||
|
mcp_server_url="http://localhost:3000/sse",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Research task using MCP tools
|
||||||
|
result1 = mcp_agent.run(
|
||||||
|
"""Navigate to news.ycombinator.com and extract the following:
|
||||||
|
1. The titles of the top 5 stories
|
||||||
|
2. Their points/scores
|
||||||
|
3. Number of comments for each
|
||||||
|
Then take a screenshot of the page."""
|
||||||
|
)
|
||||||
|
print(f"Result: {result1}")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70 + "\n")
|
||||||
|
|
||||||
|
# Example 2: Multi-session parallel browsing
|
||||||
|
print("Example 2: Multi-Session Parallel Browsing")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
parallel_agent = StagehandMCPAgent(
|
||||||
|
agent_name="ParallelBrowserAgent",
|
||||||
|
mcp_server_url="http://localhost:3000/sse",
|
||||||
|
)
|
||||||
|
|
||||||
|
result2 = parallel_agent.run(
|
||||||
|
"""Create 3 browser sessions and perform these tasks in parallel:
|
||||||
|
1. Session 1: Go to github.com/trending and extract the top 3 trending repositories
|
||||||
|
2. Session 2: Go to reddit.com/r/programming and extract the top 3 posts
|
||||||
|
3. Session 3: Go to stackoverflow.com and extract the featured questions
|
||||||
|
|
||||||
|
After extracting data from all sessions, close them."""
|
||||||
|
)
|
||||||
|
print(f"Result: {result2}")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70 + "\n")
|
||||||
|
|
||||||
|
# Example 3: Multi-agent browser swarm
|
||||||
|
print("Example 3: Multi-Agent Browser Swarm")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
# Create a swarm of specialized browser agents
|
||||||
|
browser_swarm = MultiSessionBrowserSwarm(
|
||||||
|
mcp_server_url="http://localhost:3000/sse",
|
||||||
|
num_agents=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define tasks for the swarm
|
||||||
|
swarm_tasks = [
|
||||||
|
"Create a session, navigate to python.org, and extract information about the latest Python version and its key features",
|
||||||
|
"Create a session, go to npmjs.com, search for 'stagehand', and extract information about the package including version and description",
|
||||||
|
"Create a session, visit playwright.dev, and extract the main features and benefits listed on the homepage",
|
||||||
|
]
|
||||||
|
|
||||||
|
print("Distributing tasks to browser swarm...")
|
||||||
|
swarm_results = browser_swarm.distribute_tasks(swarm_tasks)
|
||||||
|
|
||||||
|
for i, result in enumerate(swarm_results):
|
||||||
|
print(f"\nTask {i+1} Result: {result}")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70 + "\n")
|
||||||
|
|
||||||
|
# Example 4: Complex workflow with session management
|
||||||
|
print("Example 4: Complex Multi-Page Workflow")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
workflow_agent = StagehandMCPAgent(
|
||||||
|
agent_name="WorkflowAgent",
|
||||||
|
mcp_server_url="http://localhost:3000/sse",
|
||||||
|
max_loops=2, # Allow more complex reasoning
|
||||||
|
)
|
||||||
|
|
||||||
|
result4 = workflow_agent.run(
|
||||||
|
"""Perform a comprehensive analysis of AI frameworks:
|
||||||
|
1. Create a new session
|
||||||
|
2. Navigate to github.com/huggingface/transformers and extract the star count and latest release info
|
||||||
|
3. In the same session, navigate to github.com/openai/gpt-3 and extract similar information
|
||||||
|
4. Navigate to github.com/anthropics/anthropic-sdk-python and extract repository statistics
|
||||||
|
5. Take screenshots of each repository page
|
||||||
|
6. Compile a comparison report of all three repositories
|
||||||
|
7. Close the session when done"""
|
||||||
|
)
|
||||||
|
print(f"Result: {result4}")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("All examples completed!")
|
||||||
|
print("=" * 70)
|
@ -0,0 +1,359 @@
|
|||||||
|
"""
|
||||||
|
Stagehand Multi-Agent Browser Automation Workflows
|
||||||
|
=================================================
|
||||||
|
|
||||||
|
This example demonstrates advanced multi-agent workflows using Stagehand
|
||||||
|
for complex browser automation scenarios. It shows how multiple agents
|
||||||
|
can work together to accomplish sophisticated web tasks.
|
||||||
|
|
||||||
|
Use cases:
|
||||||
|
1. E-commerce price monitoring across multiple sites
|
||||||
|
2. Competitive analysis and market research
|
||||||
|
3. Automated testing and validation workflows
|
||||||
|
4. Data aggregation from multiple sources
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from loguru import logger
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from swarms import Agent, SequentialWorkflow, ConcurrentWorkflow
|
||||||
|
from swarms.structs.agent_rearrange import AgentRearrange
|
||||||
|
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# Pydantic models for structured data
|
||||||
|
class ProductInfo(BaseModel):
|
||||||
|
"""Product information schema."""
|
||||||
|
name: str = Field(..., description="Product name")
|
||||||
|
price: float = Field(..., description="Product price")
|
||||||
|
availability: str = Field(..., description="Availability status")
|
||||||
|
url: str = Field(..., description="Product URL")
|
||||||
|
screenshot_path: Optional[str] = Field(None, description="Screenshot file path")
|
||||||
|
|
||||||
|
|
||||||
|
class MarketAnalysis(BaseModel):
|
||||||
|
"""Market analysis report schema."""
|
||||||
|
timestamp: datetime = Field(default_factory=datetime.now)
|
||||||
|
products: List[ProductInfo] = Field(..., description="List of products analyzed")
|
||||||
|
price_range: Dict[str, float] = Field(..., description="Min and max prices")
|
||||||
|
recommendations: List[str] = Field(..., description="Analysis recommendations")
|
||||||
|
|
||||||
|
|
||||||
|
# Specialized browser agents
|
||||||
|
class ProductScraperAgent(StagehandAgent):
|
||||||
|
"""Specialized agent for scraping product information."""
|
||||||
|
|
||||||
|
def __init__(self, site_name: str, *args, **kwargs):
|
||||||
|
super().__init__(
|
||||||
|
agent_name=f"ProductScraper_{site_name}",
|
||||||
|
*args,
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
self.site_name = site_name
|
||||||
|
|
||||||
|
|
||||||
|
class PriceMonitorAgent(StagehandAgent):
|
||||||
|
"""Specialized agent for monitoring price changes."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(
|
||||||
|
agent_name="PriceMonitorAgent",
|
||||||
|
*args,
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Example 1: E-commerce Price Comparison Workflow
|
||||||
|
def create_price_comparison_workflow():
|
||||||
|
"""
|
||||||
|
Create a workflow that compares prices across multiple e-commerce sites.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Create specialized agents for different sites
|
||||||
|
amazon_agent = StagehandAgent(
|
||||||
|
agent_name="AmazonScraperAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
ebay_agent = StagehandAgent(
|
||||||
|
agent_name="EbayScraperAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
analysis_agent = Agent(
|
||||||
|
agent_name="PriceAnalysisAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
system_prompt="""You are a price analysis expert. Analyze product prices from multiple sources
|
||||||
|
and provide insights on the best deals, price trends, and recommendations.
|
||||||
|
Focus on value for money and highlight any significant price differences.""",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create concurrent workflow for parallel scraping
|
||||||
|
scraping_workflow = ConcurrentWorkflow(
|
||||||
|
agents=[amazon_agent, ebay_agent],
|
||||||
|
max_loops=1,
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create sequential workflow: scrape -> analyze
|
||||||
|
full_workflow = SequentialWorkflow(
|
||||||
|
agents=[scraping_workflow, analysis_agent],
|
||||||
|
max_loops=1,
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return full_workflow
|
||||||
|
|
||||||
|
|
||||||
|
# Example 2: Competitive Analysis Workflow
|
||||||
|
def create_competitive_analysis_workflow():
|
||||||
|
"""
|
||||||
|
Create a workflow for competitive analysis across multiple company websites.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Agent for extracting company information
|
||||||
|
company_researcher = StagehandAgent(
|
||||||
|
agent_name="CompanyResearchAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Agent for analyzing social media presence
|
||||||
|
social_media_agent = StagehandAgent(
|
||||||
|
agent_name="SocialMediaAnalysisAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Agent for compiling competitive analysis report
|
||||||
|
report_compiler = Agent(
|
||||||
|
agent_name="CompetitiveAnalysisReporter",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
system_prompt="""You are a competitive analysis expert. Compile comprehensive reports
|
||||||
|
based on company information and social media presence data. Identify strengths,
|
||||||
|
weaknesses, and market positioning for each company.""",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create agent rearrange for flexible routing
|
||||||
|
workflow_pattern = "company_researcher -> social_media_agent -> report_compiler"
|
||||||
|
|
||||||
|
competitive_workflow = AgentRearrange(
|
||||||
|
agents=[company_researcher, social_media_agent, report_compiler],
|
||||||
|
flow=workflow_pattern,
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return competitive_workflow
|
||||||
|
|
||||||
|
|
||||||
|
# Example 3: Automated Testing Workflow
|
||||||
|
def create_automated_testing_workflow():
|
||||||
|
"""
|
||||||
|
Create a workflow for automated web application testing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Agent for UI testing
|
||||||
|
ui_tester = StagehandAgent(
|
||||||
|
agent_name="UITestingAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Agent for form validation testing
|
||||||
|
form_tester = StagehandAgent(
|
||||||
|
agent_name="FormValidationAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Agent for accessibility testing
|
||||||
|
accessibility_tester = StagehandAgent(
|
||||||
|
agent_name="AccessibilityTestingAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Agent for compiling test results
|
||||||
|
test_reporter = Agent(
|
||||||
|
agent_name="TestReportCompiler",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
system_prompt="""You are a QA test report specialist. Compile test results from
|
||||||
|
UI, form validation, and accessibility testing into a comprehensive report.
|
||||||
|
Highlight any failures, warnings, and provide recommendations for fixes.""",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Concurrent testing followed by report generation
|
||||||
|
testing_workflow = ConcurrentWorkflow(
|
||||||
|
agents=[ui_tester, form_tester, accessibility_tester],
|
||||||
|
max_loops=1,
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
full_test_workflow = SequentialWorkflow(
|
||||||
|
agents=[testing_workflow, test_reporter],
|
||||||
|
max_loops=1,
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return full_test_workflow
|
||||||
|
|
||||||
|
|
||||||
|
# Example 4: News Aggregation and Sentiment Analysis
|
||||||
|
def create_news_aggregation_workflow():
|
||||||
|
"""
|
||||||
|
Create a workflow for news aggregation and sentiment analysis.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Multiple news scraper agents
|
||||||
|
news_scrapers = []
|
||||||
|
news_sites = [
|
||||||
|
("TechCrunch", "https://techcrunch.com"),
|
||||||
|
("HackerNews", "https://news.ycombinator.com"),
|
||||||
|
("Reddit", "https://reddit.com/r/technology"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for site_name, url in news_sites:
|
||||||
|
scraper = StagehandAgent(
|
||||||
|
agent_name=f"{site_name}Scraper",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
news_scrapers.append(scraper)
|
||||||
|
|
||||||
|
# Sentiment analysis agent
|
||||||
|
sentiment_analyzer = Agent(
|
||||||
|
agent_name="SentimentAnalyzer",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
system_prompt="""You are a sentiment analysis expert. Analyze news articles and posts
|
||||||
|
to determine overall sentiment (positive, negative, neutral) and identify key themes
|
||||||
|
and trends in the technology sector.""",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trend identification agent
|
||||||
|
trend_identifier = Agent(
|
||||||
|
agent_name="TrendIdentifier",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
system_prompt="""You are a trend analysis expert. Based on aggregated news and sentiment
|
||||||
|
data, identify emerging trends, hot topics, and potential market movements in the
|
||||||
|
technology sector.""",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create workflow: parallel scraping -> sentiment analysis -> trend identification
|
||||||
|
scraping_workflow = ConcurrentWorkflow(
|
||||||
|
agents=news_scrapers,
|
||||||
|
max_loops=1,
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
analysis_workflow = SequentialWorkflow(
|
||||||
|
agents=[scraping_workflow, sentiment_analyzer, trend_identifier],
|
||||||
|
max_loops=1,
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return analysis_workflow
|
||||||
|
|
||||||
|
|
||||||
|
# Main execution examples
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("=" * 70)
|
||||||
|
print("Stagehand Multi-Agent Workflow Examples")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Example 1: Price Comparison
|
||||||
|
print("\nExample 1: E-commerce Price Comparison")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
price_workflow = create_price_comparison_workflow()
|
||||||
|
|
||||||
|
# Search for a specific product across multiple sites
|
||||||
|
price_result = price_workflow.run(
|
||||||
|
"""Search for 'iPhone 15 Pro Max 256GB' on:
|
||||||
|
1. Amazon - extract price, availability, and seller information
|
||||||
|
2. eBay - extract price range, number of listings, and average price
|
||||||
|
Take screenshots of search results from both sites.
|
||||||
|
Compare the prices and provide recommendations on where to buy."""
|
||||||
|
)
|
||||||
|
print(f"Price Comparison Result:\n{price_result}")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70 + "\n")
|
||||||
|
|
||||||
|
# Example 2: Competitive Analysis
|
||||||
|
print("Example 2: Competitive Analysis")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
competitive_workflow = create_competitive_analysis_workflow()
|
||||||
|
|
||||||
|
competitive_result = competitive_workflow.run(
|
||||||
|
"""Analyze these three AI companies:
|
||||||
|
1. OpenAI - visit openai.com and extract mission, products, and recent announcements
|
||||||
|
2. Anthropic - visit anthropic.com and extract their AI safety approach and products
|
||||||
|
3. DeepMind - visit deepmind.com and extract research focus and achievements
|
||||||
|
|
||||||
|
Then check their Twitter/X presence and recent posts.
|
||||||
|
Compile a competitive analysis report comparing their market positioning."""
|
||||||
|
)
|
||||||
|
print(f"Competitive Analysis Result:\n{competitive_result}")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70 + "\n")
|
||||||
|
|
||||||
|
# Example 3: Automated Testing
|
||||||
|
print("Example 3: Automated Web Testing")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
testing_workflow = create_automated_testing_workflow()
|
||||||
|
|
||||||
|
test_result = testing_workflow.run(
|
||||||
|
"""Test the website example.com:
|
||||||
|
1. UI Testing: Check if all main navigation links work, images load, and layout is responsive
|
||||||
|
2. Form Testing: If there are any forms, test with valid and invalid inputs
|
||||||
|
3. Accessibility: Check for alt texts, ARIA labels, and keyboard navigation
|
||||||
|
|
||||||
|
Take screenshots of any issues found and compile a comprehensive test report."""
|
||||||
|
)
|
||||||
|
print(f"Test Results:\n{test_result}")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70 + "\n")
|
||||||
|
|
||||||
|
# Example 4: News Aggregation
|
||||||
|
print("Example 4: Tech News Aggregation and Analysis")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
news_workflow = create_news_aggregation_workflow()
|
||||||
|
|
||||||
|
news_result = news_workflow.run(
|
||||||
|
"""For each news source:
|
||||||
|
1. TechCrunch: Extract the top 5 headlines about AI or machine learning
|
||||||
|
2. HackerNews: Extract the top 5 posts related to AI/ML with most points
|
||||||
|
3. Reddit r/technology: Extract top 5 posts about AI from the past week
|
||||||
|
|
||||||
|
Analyze sentiment and identify emerging trends in AI technology."""
|
||||||
|
)
|
||||||
|
print(f"News Analysis Result:\n{news_result}")
|
||||||
|
|
||||||
|
# Cleanup all browser instances
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Cleaning up browser instances...")
|
||||||
|
|
||||||
|
# Clean up agents
|
||||||
|
for agent in price_workflow.agents:
|
||||||
|
if isinstance(agent, StagehandAgent):
|
||||||
|
agent.cleanup()
|
||||||
|
elif hasattr(agent, 'agents'): # For nested workflows
|
||||||
|
for sub_agent in agent.agents:
|
||||||
|
if isinstance(sub_agent, StagehandAgent):
|
||||||
|
sub_agent.cleanup()
|
||||||
|
|
||||||
|
print("All workflows completed!")
|
||||||
|
print("=" * 70)
|
@ -0,0 +1,249 @@
|
|||||||
|
# Stagehand Browser Automation Integration for Swarms
|
||||||
|
|
||||||
|
This directory contains examples demonstrating how to integrate [Stagehand](https://github.com/browserbase/stagehand), an AI-powered browser automation framework, with the Swarms multi-agent framework.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Stagehand provides natural language browser automation capabilities that can be seamlessly integrated into Swarms agents. This integration enables:
|
||||||
|
|
||||||
|
- 🌐 **Natural Language Web Automation**: Use simple commands like "click the submit button" or "extract product prices"
|
||||||
|
- 🤖 **Multi-Agent Browser Workflows**: Multiple agents can automate different websites simultaneously
|
||||||
|
- 🔧 **Flexible Integration Options**: Use as a wrapped agent, individual tools, or via MCP server
|
||||||
|
- 📊 **Complex Automation Scenarios**: E-commerce monitoring, competitive analysis, automated testing, and more
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### 1. Stagehand Wrapper Agent (`1_stagehand_wrapper_agent.py`)
|
||||||
|
|
||||||
|
The simplest integration - wraps Stagehand as a Swarms-compatible agent.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
|
||||||
|
|
||||||
|
# Create a browser automation agent
|
||||||
|
browser_agent = StagehandAgent(
|
||||||
|
agent_name="WebScraperAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL", # or "BROWSERBASE" for cloud execution
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use natural language to control the browser
|
||||||
|
result = browser_agent.run(
|
||||||
|
"Navigate to news.ycombinator.com and extract the top 5 story titles"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Inherits from Swarms `Agent` base class
|
||||||
|
- Automatic browser lifecycle management
|
||||||
|
- Natural language task interpretation
|
||||||
|
- Support for both local (Playwright) and cloud (Browserbase) execution
|
||||||
|
|
||||||
|
### 2. Stagehand as Tools (`2_stagehand_tools_agent.py`)
|
||||||
|
|
||||||
|
Provides fine-grained control by exposing Stagehand methods as individual tools.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from swarms import Agent
|
||||||
|
from examples.stagehand.stagehand_tools_agent import (
|
||||||
|
NavigateTool, ActTool, ExtractTool, ObserveTool, ScreenshotTool
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create agent with browser tools
|
||||||
|
browser_agent = Agent(
|
||||||
|
agent_name="BrowserAutomationAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
tools=[
|
||||||
|
NavigateTool(),
|
||||||
|
ActTool(),
|
||||||
|
ExtractTool(),
|
||||||
|
ObserveTool(),
|
||||||
|
ScreenshotTool(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Agent can now use tools strategically
|
||||||
|
result = browser_agent.run(
|
||||||
|
"Go to google.com, search for 'Python tutorials', and extract the first 3 results"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Available Tools:**
|
||||||
|
- `NavigateTool`: Navigate to URLs
|
||||||
|
- `ActTool`: Perform actions (click, type, scroll)
|
||||||
|
- `ExtractTool`: Extract data from pages
|
||||||
|
- `ObserveTool`: Find elements on pages
|
||||||
|
- `ScreenshotTool`: Capture screenshots
|
||||||
|
- `CloseBrowserTool`: Clean up browser resources
|
||||||
|
|
||||||
|
### 3. Stagehand MCP Server (`3_stagehand_mcp_agent.py`)
|
||||||
|
|
||||||
|
Integrates with Stagehand's Model Context Protocol (MCP) server for standardized tool access.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from examples.stagehand.stagehand_mcp_agent import StagehandMCPAgent
|
||||||
|
|
||||||
|
# Connect to Stagehand MCP server
|
||||||
|
mcp_agent = StagehandMCPAgent(
|
||||||
|
agent_name="WebResearchAgent",
|
||||||
|
mcp_server_url="http://localhost:3000/sse",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use MCP tools including multi-session management
|
||||||
|
result = mcp_agent.run("""
|
||||||
|
Create 3 browser sessions and:
|
||||||
|
1. Session 1: Check Python.org for latest version
|
||||||
|
2. Session 2: Check PyPI for trending packages
|
||||||
|
3. Session 3: Check GitHub Python trending repos
|
||||||
|
Compile a Python ecosystem status report.
|
||||||
|
""")
|
||||||
|
```
|
||||||
|
|
||||||
|
**MCP Features:**
|
||||||
|
- Automatic tool discovery
|
||||||
|
- Multi-session browser management
|
||||||
|
- Built-in screenshot resources
|
||||||
|
- Prompt templates for common tasks
|
||||||
|
|
||||||
|
### 4. Multi-Agent Workflows (`4_stagehand_multi_agent_workflow.py`)
|
||||||
|
|
||||||
|
Demonstrates complex multi-agent browser automation scenarios.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from examples.stagehand.stagehand_multi_agent_workflow import (
|
||||||
|
create_price_comparison_workflow,
|
||||||
|
create_competitive_analysis_workflow,
|
||||||
|
create_automated_testing_workflow,
|
||||||
|
create_news_aggregation_workflow
|
||||||
|
)
|
||||||
|
|
||||||
|
# Price comparison across multiple e-commerce sites
|
||||||
|
price_workflow = create_price_comparison_workflow()
|
||||||
|
result = price_workflow.run(
|
||||||
|
"Compare prices for iPhone 15 Pro on Amazon and eBay"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Competitive analysis of multiple companies
|
||||||
|
competitive_workflow = create_competitive_analysis_workflow()
|
||||||
|
result = competitive_workflow.run(
|
||||||
|
"Analyze OpenAI, Anthropic, and DeepMind websites and social media"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Workflow Examples:**
|
||||||
|
- **E-commerce Monitoring**: Track prices across multiple sites
|
||||||
|
- **Competitive Analysis**: Research competitors' websites and social media
|
||||||
|
- **Automated Testing**: UI, form validation, and accessibility testing
|
||||||
|
- **News Aggregation**: Collect and analyze news from multiple sources
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
1. **Install Swarms and Stagehand:**
|
||||||
|
```bash
|
||||||
|
pip install swarms stagehand
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Set up environment variables:**
|
||||||
|
```bash
|
||||||
|
# For local browser automation (using Playwright)
|
||||||
|
export OPENAI_API_KEY="your-openai-key"
|
||||||
|
|
||||||
|
# For cloud browser automation (using Browserbase)
|
||||||
|
export BROWSERBASE_API_KEY="your-browserbase-key"
|
||||||
|
export BROWSERBASE_PROJECT_ID="your-project-id"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **For MCP Server examples:**
|
||||||
|
```bash
|
||||||
|
# Install and run the Stagehand MCP server
|
||||||
|
cd stagehand-mcp-server
|
||||||
|
npm install
|
||||||
|
npm run build
|
||||||
|
npm start
|
||||||
|
```
|
||||||
|
|
||||||
|
## Use Cases
|
||||||
|
|
||||||
|
### E-commerce Automation
|
||||||
|
- Price monitoring and comparison
|
||||||
|
- Inventory tracking
|
||||||
|
- Automated purchasing workflows
|
||||||
|
- Review aggregation
|
||||||
|
|
||||||
|
### Research and Analysis
|
||||||
|
- Competitive intelligence gathering
|
||||||
|
- Market research automation
|
||||||
|
- Social media monitoring
|
||||||
|
- News and trend analysis
|
||||||
|
|
||||||
|
### Quality Assurance
|
||||||
|
- Automated UI testing
|
||||||
|
- Cross-browser compatibility testing
|
||||||
|
- Form validation testing
|
||||||
|
- Accessibility compliance checking
|
||||||
|
|
||||||
|
### Data Collection
|
||||||
|
- Web scraping at scale
|
||||||
|
- Real-time data monitoring
|
||||||
|
- Structured data extraction
|
||||||
|
- Screenshot documentation
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Resource Management**: Always clean up browser instances when done
|
||||||
|
```python
|
||||||
|
browser_agent.cleanup() # For wrapper agents
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Error Handling**: Stagehand includes self-healing capabilities, but wrap critical operations in try-except blocks
|
||||||
|
|
||||||
|
3. **Parallel Execution**: Use `ConcurrentWorkflow` for simultaneous browser automation across multiple sites
|
||||||
|
|
||||||
|
4. **Session Management**: For complex multi-page workflows, use the MCP server's session management capabilities
|
||||||
|
|
||||||
|
5. **Rate Limiting**: Be respectful of websites - add delays between requests when necessary
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
Run the test suite to verify the integration:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/stagehand/test_stagehand_integration.py -v
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **Browser not starting**: Ensure Playwright is properly installed
|
||||||
|
```bash
|
||||||
|
playwright install
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **MCP connection failed**: Verify the MCP server is running on the correct port
|
||||||
|
|
||||||
|
3. **Timeout errors**: Increase timeout in StagehandConfig or agent initialization
|
||||||
|
|
||||||
|
### Debug Mode
|
||||||
|
|
||||||
|
Enable verbose logging:
|
||||||
|
```python
|
||||||
|
agent = StagehandAgent(
|
||||||
|
agent_name="DebugAgent",
|
||||||
|
verbose=True, # Enable detailed logging
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
We welcome contributions! Please:
|
||||||
|
1. Follow the existing code style
|
||||||
|
2. Add tests for new features
|
||||||
|
3. Update documentation
|
||||||
|
4. Submit PRs with clear descriptions
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
These examples are provided under the same license as the Swarms framework. Stagehand is licensed separately - see [Stagehand's repository](https://github.com/browserbase/stagehand) for details.
|
@ -0,0 +1,13 @@
|
|||||||
|
# Requirements for Stagehand integration examples
|
||||||
|
swarms>=8.0.0
|
||||||
|
stagehand>=0.1.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
pydantic>=2.0.0
|
||||||
|
loguru>=0.7.0
|
||||||
|
|
||||||
|
# For MCP server examples (optional)
|
||||||
|
httpx>=0.24.0
|
||||||
|
|
||||||
|
# For testing
|
||||||
|
pytest>=7.0.0
|
||||||
|
pytest-asyncio>=0.21.0
|
@ -0,0 +1,356 @@
|
|||||||
|
"""
|
||||||
|
Tests for Stagehand Integration with Swarms
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
This module contains tests for the Stagehand browser automation
|
||||||
|
integration with the Swarms framework.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
from swarms import Agent
|
||||||
|
from swarms.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
|
# Mock Stagehand classes
|
||||||
|
class MockObserveResult:
|
||||||
|
def __init__(self, description, selector, method="click"):
|
||||||
|
self.description = description
|
||||||
|
self.selector = selector
|
||||||
|
self.method = method
|
||||||
|
|
||||||
|
|
||||||
|
class MockStagehandPage:
|
||||||
|
async def goto(self, url):
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def act(self, action):
|
||||||
|
return f"Performed action: {action}"
|
||||||
|
|
||||||
|
async def extract(self, query):
|
||||||
|
return {"extracted": query, "data": ["item1", "item2"]}
|
||||||
|
|
||||||
|
async def observe(self, query):
|
||||||
|
return [
|
||||||
|
MockObserveResult("Search box", "#search-input"),
|
||||||
|
MockObserveResult("Submit button", "#submit-btn"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class MockStagehand:
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
self.page = MockStagehandPage()
|
||||||
|
|
||||||
|
async def init(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Test StagehandAgent wrapper
|
||||||
|
class TestStagehandAgent:
|
||||||
|
"""Test the StagehandAgent wrapper class."""
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
|
||||||
|
def test_agent_initialization(self):
|
||||||
|
"""Test that StagehandAgent initializes correctly."""
|
||||||
|
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
|
||||||
|
|
||||||
|
agent = StagehandAgent(
|
||||||
|
agent_name="TestAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert agent.agent_name == "TestAgent"
|
||||||
|
assert agent.stagehand_config.env == "LOCAL"
|
||||||
|
assert agent.stagehand_config.model_name == "gpt-4o-mini"
|
||||||
|
assert not agent._initialized
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
|
||||||
|
def test_navigation_task(self):
|
||||||
|
"""Test navigation and extraction task."""
|
||||||
|
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
|
||||||
|
|
||||||
|
agent = StagehandAgent(
|
||||||
|
agent_name="TestAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = agent.run("Navigate to example.com and extract the main content")
|
||||||
|
|
||||||
|
# Parse result
|
||||||
|
result_data = json.loads(result)
|
||||||
|
assert result_data["status"] == "completed"
|
||||||
|
assert "navigated_to" in result_data["data"]
|
||||||
|
assert result_data["data"]["navigated_to"] == "https://example.com"
|
||||||
|
assert "extracted" in result_data["data"]
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
|
||||||
|
def test_search_task(self):
|
||||||
|
"""Test search functionality."""
|
||||||
|
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
|
||||||
|
|
||||||
|
agent = StagehandAgent(
|
||||||
|
agent_name="TestAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = agent.run("Go to google.com and search for 'test query'")
|
||||||
|
|
||||||
|
result_data = json.loads(result)
|
||||||
|
assert result_data["status"] == "completed"
|
||||||
|
assert result_data["data"]["search_query"] == "test query"
|
||||||
|
assert result_data["action"] == "search"
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
|
||||||
|
def test_cleanup(self):
|
||||||
|
"""Test that cleanup properly closes browser."""
|
||||||
|
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
|
||||||
|
|
||||||
|
agent = StagehandAgent(
|
||||||
|
agent_name="TestAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize the agent
|
||||||
|
agent.run("Navigate to example.com")
|
||||||
|
assert agent._initialized
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
agent.cleanup()
|
||||||
|
|
||||||
|
# After cleanup, should be able to run again
|
||||||
|
result = agent.run("Navigate to example.com")
|
||||||
|
assert result is not None
|
||||||
|
|
||||||
|
|
||||||
|
# Test Stagehand Tools
|
||||||
|
class TestStagehandTools:
|
||||||
|
"""Test individual Stagehand tools."""
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_tools_agent.browser_state')
|
||||||
|
async def test_navigate_tool(self, mock_browser_state):
|
||||||
|
"""Test NavigateTool functionality."""
|
||||||
|
from examples.stagehand.stagehand_tools_agent import NavigateTool
|
||||||
|
|
||||||
|
# Setup mock
|
||||||
|
mock_page = AsyncMock()
|
||||||
|
mock_browser_state.get_page = AsyncMock(return_value=mock_page)
|
||||||
|
mock_browser_state.init_browser = AsyncMock()
|
||||||
|
|
||||||
|
tool = NavigateTool()
|
||||||
|
result = await tool._async_run("https://example.com")
|
||||||
|
|
||||||
|
assert "Successfully navigated to https://example.com" in result
|
||||||
|
mock_page.goto.assert_called_once_with("https://example.com")
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_tools_agent.browser_state')
|
||||||
|
async def test_act_tool(self, mock_browser_state):
|
||||||
|
"""Test ActTool functionality."""
|
||||||
|
from examples.stagehand.stagehand_tools_agent import ActTool
|
||||||
|
|
||||||
|
# Setup mock
|
||||||
|
mock_page = AsyncMock()
|
||||||
|
mock_page.act = AsyncMock(return_value="Action completed")
|
||||||
|
mock_browser_state.get_page = AsyncMock(return_value=mock_page)
|
||||||
|
mock_browser_state.init_browser = AsyncMock()
|
||||||
|
|
||||||
|
tool = ActTool()
|
||||||
|
result = await tool._async_run("click the button")
|
||||||
|
|
||||||
|
assert "Action performed" in result
|
||||||
|
assert "click the button" in result
|
||||||
|
mock_page.act.assert_called_once_with("click the button")
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_tools_agent.browser_state')
|
||||||
|
async def test_extract_tool(self, mock_browser_state):
|
||||||
|
"""Test ExtractTool functionality."""
|
||||||
|
from examples.stagehand.stagehand_tools_agent import ExtractTool
|
||||||
|
|
||||||
|
# Setup mock
|
||||||
|
mock_page = AsyncMock()
|
||||||
|
mock_page.extract = AsyncMock(return_value={"title": "Test Page", "content": "Test content"})
|
||||||
|
mock_browser_state.get_page = AsyncMock(return_value=mock_page)
|
||||||
|
mock_browser_state.init_browser = AsyncMock()
|
||||||
|
|
||||||
|
tool = ExtractTool()
|
||||||
|
result = await tool._async_run("extract the page title")
|
||||||
|
|
||||||
|
# Result should be JSON string
|
||||||
|
parsed_result = json.loads(result)
|
||||||
|
assert parsed_result["title"] == "Test Page"
|
||||||
|
assert parsed_result["content"] == "Test content"
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_tools_agent.browser_state')
|
||||||
|
async def test_observe_tool(self, mock_browser_state):
|
||||||
|
"""Test ObserveTool functionality."""
|
||||||
|
from examples.stagehand.stagehand_tools_agent import ObserveTool
|
||||||
|
|
||||||
|
# Setup mock
|
||||||
|
mock_page = AsyncMock()
|
||||||
|
mock_observations = [
|
||||||
|
MockObserveResult("Search input", "#search"),
|
||||||
|
MockObserveResult("Submit button", "#submit"),
|
||||||
|
]
|
||||||
|
mock_page.observe = AsyncMock(return_value=mock_observations)
|
||||||
|
mock_browser_state.get_page = AsyncMock(return_value=mock_page)
|
||||||
|
mock_browser_state.init_browser = AsyncMock()
|
||||||
|
|
||||||
|
tool = ObserveTool()
|
||||||
|
result = await tool._async_run("find the search box")
|
||||||
|
|
||||||
|
# Result should be JSON string
|
||||||
|
parsed_result = json.loads(result)
|
||||||
|
assert len(parsed_result) == 2
|
||||||
|
assert parsed_result[0]["description"] == "Search input"
|
||||||
|
assert parsed_result[0]["selector"] == "#search"
|
||||||
|
|
||||||
|
|
||||||
|
# Test MCP integration
|
||||||
|
class TestStagehandMCP:
|
||||||
|
"""Test Stagehand MCP server integration."""
|
||||||
|
|
||||||
|
def test_mcp_agent_initialization(self):
|
||||||
|
"""Test that MCP agent initializes with correct parameters."""
|
||||||
|
from examples.stagehand.stagehand_mcp_agent import StagehandMCPAgent
|
||||||
|
|
||||||
|
mcp_agent = StagehandMCPAgent(
|
||||||
|
agent_name="TestMCPAgent",
|
||||||
|
mcp_server_url="http://localhost:3000/sse",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert mcp_agent.agent.agent_name == "TestMCPAgent"
|
||||||
|
assert mcp_agent.agent.mcp_url == "http://localhost:3000/sse"
|
||||||
|
assert mcp_agent.agent.model_name == "gpt-4o-mini"
|
||||||
|
|
||||||
|
def test_multi_session_swarm_creation(self):
|
||||||
|
"""Test multi-session browser swarm creation."""
|
||||||
|
from examples.stagehand.stagehand_mcp_agent import MultiSessionBrowserSwarm
|
||||||
|
|
||||||
|
swarm = MultiSessionBrowserSwarm(
|
||||||
|
mcp_server_url="http://localhost:3000/sse",
|
||||||
|
num_agents=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(swarm.agents) == 3
|
||||||
|
assert swarm.agents[0].agent_name == "DataExtractor_0"
|
||||||
|
assert swarm.agents[1].agent_name == "FormFiller_1"
|
||||||
|
assert swarm.agents[2].agent_name == "WebMonitor_2"
|
||||||
|
|
||||||
|
@patch('swarms.Agent.run')
|
||||||
|
def test_task_distribution(self, mock_run):
|
||||||
|
"""Test task distribution among swarm agents."""
|
||||||
|
from examples.stagehand.stagehand_mcp_agent import MultiSessionBrowserSwarm
|
||||||
|
|
||||||
|
mock_run.return_value = "Task completed"
|
||||||
|
|
||||||
|
swarm = MultiSessionBrowserSwarm(num_agents=2)
|
||||||
|
tasks = ["Task 1", "Task 2", "Task 3"]
|
||||||
|
|
||||||
|
results = swarm.distribute_tasks(tasks)
|
||||||
|
|
||||||
|
assert len(results) == 3
|
||||||
|
assert all(result == "Task completed" for result in results)
|
||||||
|
assert mock_run.call_count == 3
|
||||||
|
|
||||||
|
|
||||||
|
# Test multi-agent workflows
|
||||||
|
class TestMultiAgentWorkflows:
|
||||||
|
"""Test multi-agent workflow configurations."""
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
|
||||||
|
def test_price_comparison_workflow_creation(self):
|
||||||
|
"""Test creation of price comparison workflow."""
|
||||||
|
from examples.stagehand.stagehand_multi_agent_workflow import create_price_comparison_workflow
|
||||||
|
|
||||||
|
workflow = create_price_comparison_workflow()
|
||||||
|
|
||||||
|
# Should be a SequentialWorkflow with 2 agents
|
||||||
|
assert len(workflow.agents) == 2
|
||||||
|
# First agent should be a ConcurrentWorkflow
|
||||||
|
assert hasattr(workflow.agents[0], 'agents')
|
||||||
|
# Second agent should be the analysis agent
|
||||||
|
assert workflow.agents[1].agent_name == "PriceAnalysisAgent"
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
|
||||||
|
def test_competitive_analysis_workflow_creation(self):
|
||||||
|
"""Test creation of competitive analysis workflow."""
|
||||||
|
from examples.stagehand.stagehand_multi_agent_workflow import create_competitive_analysis_workflow
|
||||||
|
|
||||||
|
workflow = create_competitive_analysis_workflow()
|
||||||
|
|
||||||
|
# Should have 3 agents in the rearrange pattern
|
||||||
|
assert len(workflow.agents) == 3
|
||||||
|
assert workflow.flow == "company_researcher -> social_media_agent -> report_compiler"
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
|
||||||
|
def test_automated_testing_workflow_creation(self):
|
||||||
|
"""Test creation of automated testing workflow."""
|
||||||
|
from examples.stagehand.stagehand_multi_agent_workflow import create_automated_testing_workflow
|
||||||
|
|
||||||
|
workflow = create_automated_testing_workflow()
|
||||||
|
|
||||||
|
# Should be a SequentialWorkflow
|
||||||
|
assert len(workflow.agents) == 2
|
||||||
|
# First should be concurrent testing
|
||||||
|
assert hasattr(workflow.agents[0], 'agents')
|
||||||
|
assert len(workflow.agents[0].agents) == 3 # UI, Form, Accessibility testers
|
||||||
|
|
||||||
|
@patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
|
||||||
|
def test_news_aggregation_workflow_creation(self):
|
||||||
|
"""Test creation of news aggregation workflow."""
|
||||||
|
from examples.stagehand.stagehand_multi_agent_workflow import create_news_aggregation_workflow
|
||||||
|
|
||||||
|
workflow = create_news_aggregation_workflow()
|
||||||
|
|
||||||
|
# Should be a SequentialWorkflow with 3 stages
|
||||||
|
assert len(workflow.agents) == 3
|
||||||
|
# First stage should be concurrent scrapers
|
||||||
|
assert hasattr(workflow.agents[0], 'agents')
|
||||||
|
assert len(workflow.agents[0].agents) == 3 # 3 news sources
|
||||||
|
|
||||||
|
|
||||||
|
# Integration tests
|
||||||
|
class TestIntegration:
|
||||||
|
"""End-to-end integration tests."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch('examples.stagehand.stagehand_wrapper_agent.Stagehand', MockStagehand)
|
||||||
|
async def test_full_browser_automation_flow(self):
|
||||||
|
"""Test a complete browser automation flow."""
|
||||||
|
from examples.stagehand.stagehand_wrapper_agent import StagehandAgent
|
||||||
|
|
||||||
|
agent = StagehandAgent(
|
||||||
|
agent_name="IntegrationTestAgent",
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
env="LOCAL",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test navigation
|
||||||
|
nav_result = agent.run("Navigate to example.com")
|
||||||
|
assert "navigated_to" in nav_result
|
||||||
|
|
||||||
|
# Test extraction
|
||||||
|
extract_result = agent.run("Extract all text from the page")
|
||||||
|
assert "extracted" in extract_result
|
||||||
|
|
||||||
|
# Test observation
|
||||||
|
observe_result = agent.run("Find all buttons on the page")
|
||||||
|
assert "observation" in observe_result
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
agent.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pytest.main([__file__, "-v"])
|
Loading…
Reference in new issue