Web scraper agents are specialized AI agents that can automatically extract and process information from websites. These agents combine the power of large language models with web scraping tools to intelligently gather, analyze, and structure data from the web.
| **Automatically navigate websites** | Extract relevant information from web pages |
| **Parse and structure data** | Convert HTML content into readable, structured formats |
| **Handle dynamic content** | Process JavaScript-rendered pages and dynamic website elements |
| **Provide intelligent summaries and analysis** | Generate summaries and analyze the scraped content |
| **Scale to multiple websites simultaneously** | Scrape and process data from several websites at once for comprehensive research |
## Install
```bash
pip3 install -U swarms swarms-tools
```
## Environment Setup
```bash
OPENAI_API_KEY="your_openai_api_key_here"
```
## Basic Usage
Here's a simple example of how to create a web scraper agent:
```python
from swarms import Agent
from swarms_tools import scrape_and_format_sync
agent = Agent(
agent_name="Web Scraper Agent",
model_name="gpt-4o-mini",
tools=[scrape_and_format_sync],
dynamic_context_window=True,
dynamic_temperature_enabled=True,
max_loops=1,
system_prompt="You are a web scraper agent. You are given a URL and you need to scrape the website and return the data in a structured format. The format type should be full",
)
out = agent.run(
"Scrape swarms.ai website and provide a full report of the company does. The format type should be full."
)
print(out)
```
## Scraping Multiple Sites
For comprehensive research, you can scrape multiple websites simultaneously using batch execution:
```python
from swarms.structs.multi_agent_exec import batched_grid_agent_execution
from swarms_tools import scrape_and_format_sync
from swarms import Agent
agent = Agent(
agent_name="Web Scraper Agent",
model_name="gpt-4o-mini",
tools=[scrape_and_format_sync],
dynamic_context_window=True,
dynamic_temperature_enabled=True,
max_loops=1,
system_prompt="You are a web scraper agent. You are given a URL and you need to scrape the website and return the data in a structured format. The format type should be full",
)
out = batched_grid_agent_execution(
agents=[agent, agent],
tasks=[
"Scrape swarms.ai website and provide a full report of the company's mission, products, and team. The format type should be full.",
"Scrape langchain.com website and provide a full report of the company's mission, products, and team. The format type should be full.",
],
)
print(out)
```
## Conclusion
Web scraper agents combine AI with advanced automation to efficiently gather and process web data at scale. As you master the basics, explore features like batch processing and custom tools to unlock the full power of AI-driven web scraping.
system_prompt="You are a web scraper agent. You are given a URL and you need to scrape the website and return the data in a structured format. The format type should be full",
)
out=batched_grid_agent_execution(
agents=[agent,agent],
tasks=[
"Scrape swarms.ai website and provide a full report of the company's mission, products, and team. The format type should be full.",
"Scrape langchain.com website and provide a full report of the company's mission, products, and team. The format type should be full.",
system_prompt="You are a web scraper agent. You are given a URL and you need to scrape the website and return the data in a structured format. The format type should be full",
)
out=agent.run(
"Scrape swarms.ai website and provide a full report of the company does. The format type should be full."
logger.debug(f"[SCHEMA] Director schema: {schema}")
returnAgent(
agent_name=self.director_name,
@ -923,7 +924,7 @@ class HierarchicalSwarm:
)
exceptExceptionase:
error_msg=f"❌ Failed to setup director: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to setup director: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
logger.error(error_msg)
defreliability_checks(self):
@ -963,7 +964,7 @@ class HierarchicalSwarm:
)
exceptExceptionase:
error_msg=f"❌ Failed to setup director: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to setup director: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
logger.error(error_msg)
defagents_no_print(self):
@ -995,7 +996,9 @@ class HierarchicalSwarm:
"""
try:
ifself.verbose:
logger.info(f"🎯 Running director with task: {task}")
logger.info(
f"[RUN] Running director with task: {task}"
)
ifself.planning_director_agentisnotNone:
plan=self.planning_director_agent.run(
@ -1022,15 +1025,17 @@ class HierarchicalSwarm:
)
ifself.verbose:
logger.success("✅ Director execution completed")
logger.success(
"[SUCCESS] Director execution completed"
)
logger.debug(
f"📋 Director output type: {type(function_call)}"
f"[OUTPUT] Director output type: {type(function_call)}"
)
returnfunction_call
exceptExceptionase:
error_msg=f"❌ Failed to setup director: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to setup director: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
logger.error(error_msg)
raisee
@ -1059,7 +1064,7 @@ class HierarchicalSwarm:
try:
ifself.verbose:
logger.info(
f"👣 Executing single step for task: {task}"
f"[STEP] Executing single step for task: {task}"
)
# Update dashboard for director execution
@ -1073,7 +1078,7 @@ class HierarchicalSwarm:
ifself.verbose:
logger.info(
f"📋 Parsed plan and {len(orders)} orders"
f"[PARSE] Parsed plan and {len(orders)} orders"
)
# Update dashboard with plan and orders information
error_msg=f"❌ Failed to setup director: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to setup director: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"❌ Failed to setup director: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to setup director: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
logger.error(error_msg)
current_loop+=1
@ -1218,10 +1224,10 @@ class HierarchicalSwarm:
ifself.verbose:
logger.success(
f"🎉 Hierarchical swarm run completed: {self.name}"
f"[COMPLETE] Hierarchical swarm run completed: {self.name}"
)
logger.info(
f"📊 Total loops executed: {current_loop}"
f"[STATS] Total loops executed: {current_loop}"
)
returnhistory_output_formatter(
@ -1234,7 +1240,7 @@ class HierarchicalSwarm:
self.dashboard.update_director_status("ERROR")
self.dashboard.stop()
error_msg=f"❌ Failed to setup director: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to setup director: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
logger.error(error_msg)
def_get_interactive_task(self)->str:
@ -1275,7 +1281,7 @@ class HierarchicalSwarm:
"""
try:
ifself.verbose:
logger.info("📝 Generating director feedback")
logger.info("[FEEDBACK] Generating director feedback")
"[SUCCESS] Director feedback generated successfully"
)
returnoutput
exceptExceptionase:
error_msg=f"❌ Failed to setup director: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to setup director: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"❌ Failed to setup director: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to setup director: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
f"✅ Successfully parsed plan and {len(orders)} orders"
f"[SUCCESS] Successfully parsed plan and {len(orders)} orders"
)
returnplan,orders
@ -1463,7 +1469,7 @@ class HierarchicalSwarm:
)asjson_err:
ifself.verbose:
logger.warning(
f"⚠️ JSON decode error: {json_err}"
f"[WARN] JSON decode error: {json_err}"
)
pass
# Check if it's a direct function call format
@ -1488,7 +1494,7 @@ class HierarchicalSwarm:
ifself.verbose:
logger.success(
f"✅ Successfully parsed plan and {len(orders)} orders"
f"[SUCCESS] Successfully parsed plan and {len(orders)} orders"
)
returnplan,orders
@ -1497,7 +1503,7 @@ class HierarchicalSwarm:
)asjson_err:
ifself.verbose:
logger.warning(
f"⚠️ JSON decode error: {json_err}"
f"[WARN] JSON decode error: {json_err}"
)
pass
# If no function call found, raise error
@ -1515,7 +1521,7 @@ class HierarchicalSwarm:
ifself.verbose:
logger.success(
f"✅ Successfully parsed plan and {len(orders)} orders"
f"[SUCCESS] Successfully parsed plan and {len(orders)} orders"
)
returnplan,orders
@ -1529,7 +1535,7 @@ class HierarchicalSwarm:
)
exceptExceptionase:
error_msg=f"❌ Failed to parse orders: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to parse orders: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
f"📋 Executing order {i+1}/{len(orders)}: {order.agent_name}"
f"[ORDER] Executing order {i+1}/{len(orders)}: {order.agent_name}"
)
# Update dashboard for agent execution
@ -1590,13 +1596,13 @@ class HierarchicalSwarm:
ifself.verbose:
logger.success(
f"✅ All {len(orders)} orders executed successfully"
f"[SUCCESS] All {len(orders)} orders executed successfully"
)
returnoutputs
exceptExceptionase:
error_msg=f"❌ Failed to setup director: {str(e)}\n🔍 Traceback: {traceback.format_exc()}\n🐛 If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"
error_msg=f"[ERROR] Failed to setup director: {str(e)}\n[TRACE] Traceback: {traceback.format_exc()}\n[BUG] If this issue persists, please report it at: https://github.com/kyegomez/swarms/issues"