parent
582edf427f
commit
bff7a18047
@ -1,186 +0,0 @@
|
||||
# Deep Research Swarm
|
||||
|
||||
!!! abstract "Overview"
|
||||
The Deep Research Swarm is a powerful, production-grade research system that conducts comprehensive analysis across multiple domains using parallel processing and advanced AI agents.
|
||||
|
||||
Key Features:
|
||||
|
||||
- Parallel search processing
|
||||
|
||||
- Multi-agent research coordination
|
||||
|
||||
- Advanced information synthesis
|
||||
|
||||
- Automated query generation
|
||||
|
||||
- Concurrent task execution
|
||||
|
||||
## Getting Started
|
||||
|
||||
!!! tip "Quick Installation"
|
||||
```bash
|
||||
pip install swarms
|
||||
```
|
||||
|
||||
=== "Basic Usage"
|
||||
```python
|
||||
from swarms.structs import DeepResearchSwarm
|
||||
|
||||
# Initialize the swarm
|
||||
swarm = DeepResearchSwarm(
|
||||
name="MyResearchSwarm",
|
||||
output_type="json",
|
||||
max_loops=1
|
||||
)
|
||||
|
||||
# Run a single research task
|
||||
results = swarm.run("What are the latest developments in quantum computing?")
|
||||
```
|
||||
|
||||
=== "Batch Processing"
|
||||
```python
|
||||
# Run multiple research tasks in parallel
|
||||
tasks = [
|
||||
"What are the environmental impacts of electric vehicles?",
|
||||
"How is AI being used in drug discovery?",
|
||||
]
|
||||
batch_results = swarm.batched_run(tasks)
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
!!! info "Constructor Arguments"
|
||||
| Parameter | Type | Default | Description |
|
||||
|-----------|------|---------|-------------|
|
||||
| `name` | str | "DeepResearchSwarm" | Name identifier for the swarm |
|
||||
| `description` | str | "A swarm that conducts..." | Description of the swarm's purpose |
|
||||
| `research_agent` | Agent | research_agent | Custom research agent instance |
|
||||
| `max_loops` | int | 1 | Maximum number of research iterations |
|
||||
| `nice_print` | bool | True | Enable formatted console output |
|
||||
| `output_type` | str | "json" | Output format ("json" or "string") |
|
||||
| `max_workers` | int | CPU_COUNT * 2 | Maximum concurrent threads |
|
||||
| `token_count` | bool | False | Enable token counting |
|
||||
| `research_model_name` | str | "gpt-4o-mini" | Model to use for research |
|
||||
|
||||
## Core Methods
|
||||
|
||||
### Run
|
||||
!!! example "Single Task Execution"
|
||||
```python
|
||||
results = swarm.run("What are the latest breakthroughs in fusion energy?")
|
||||
```
|
||||
|
||||
### Batched Run
|
||||
!!! example "Parallel Task Execution"
|
||||
```python
|
||||
tasks = [
|
||||
"What are current AI safety initiatives?",
|
||||
"How is CRISPR being used in agriculture?",
|
||||
]
|
||||
results = swarm.batched_run(tasks)
|
||||
```
|
||||
|
||||
### Step
|
||||
!!! example "Single Step Execution"
|
||||
```python
|
||||
results = swarm.step("Analyze recent developments in renewable energy storage")
|
||||
```
|
||||
|
||||
## Domain-Specific Examples
|
||||
|
||||
=== "Scientific Research"
|
||||
```python
|
||||
science_swarm = DeepResearchSwarm(
|
||||
name="ScienceSwarm",
|
||||
output_type="json",
|
||||
max_loops=2 # More iterations for thorough research
|
||||
)
|
||||
|
||||
results = science_swarm.run(
|
||||
"What are the latest experimental results in quantum entanglement?"
|
||||
)
|
||||
```
|
||||
|
||||
=== "Market Research"
|
||||
```python
|
||||
market_swarm = DeepResearchSwarm(
|
||||
name="MarketSwarm",
|
||||
output_type="json"
|
||||
)
|
||||
|
||||
results = market_swarm.run(
|
||||
"What are the emerging trends in electric vehicle battery technology market?"
|
||||
)
|
||||
```
|
||||
|
||||
=== "News Analysis"
|
||||
```python
|
||||
news_swarm = DeepResearchSwarm(
|
||||
name="NewsSwarm",
|
||||
output_type="string" # Human-readable output
|
||||
)
|
||||
|
||||
results = news_swarm.run(
|
||||
"What are the global economic impacts of recent geopolitical events?"
|
||||
)
|
||||
```
|
||||
|
||||
=== "Medical Research"
|
||||
```python
|
||||
medical_swarm = DeepResearchSwarm(
|
||||
name="MedicalSwarm",
|
||||
max_loops=2
|
||||
)
|
||||
|
||||
results = medical_swarm.run(
|
||||
"What are the latest clinical trials for Alzheimer's treatment?"
|
||||
)
|
||||
```
|
||||
|
||||
## Advanced Features
|
||||
|
||||
??? note "Custom Research Agent"
|
||||
```python
|
||||
from swarms import Agent
|
||||
|
||||
custom_agent = Agent(
|
||||
agent_name="SpecializedResearcher",
|
||||
system_prompt="Your specialized prompt here",
|
||||
model_name="gpt-4"
|
||||
)
|
||||
|
||||
swarm = DeepResearchSwarm(
|
||||
research_agent=custom_agent,
|
||||
max_loops=2
|
||||
)
|
||||
```
|
||||
|
||||
??? note "Parallel Processing Control"
|
||||
```python
|
||||
swarm = DeepResearchSwarm(
|
||||
max_workers=8, # Limit to 8 concurrent threads
|
||||
nice_print=False # Disable console output for production
|
||||
)
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
!!! success "Recommended Practices"
|
||||
1. **Query Formulation**: Be specific and clear in your research queries
|
||||
2. **Resource Management**: Adjust `max_workers` based on your system's capabilities
|
||||
3. **Output Handling**: Use appropriate `output_type` for your use case
|
||||
4. **Error Handling**: Implement try-catch blocks around swarm operations
|
||||
5. **Model Selection**: Choose appropriate models based on research complexity
|
||||
|
||||
## Limitations
|
||||
|
||||
!!! warning "Known Limitations"
|
||||
|
||||
- Requires valid API keys for external services
|
||||
|
||||
- Performance depends on system resources
|
||||
|
||||
- Rate limits may apply to external API calls
|
||||
|
||||
- Token limits apply to model responses
|
||||
|
@ -1,10 +0,0 @@
|
||||
from swarms.structs.deep_research_swarm import DeepResearchSwarm
|
||||
|
||||
|
||||
model = DeepResearchSwarm(
|
||||
research_model_name="groq/deepseek-r1-distill-qwen-32b"
|
||||
)
|
||||
|
||||
model.run(
|
||||
"What are the latest research papers on extending telomeres in humans? Give 1 queries for the search not too many`"
|
||||
)
|
@ -1,12 +0,0 @@
|
||||
from swarms.structs.deep_research_swarm import DeepResearchSwarm
|
||||
|
||||
swarm = DeepResearchSwarm(
|
||||
name="Deep Research Swarm",
|
||||
description="A swarm that conducts comprehensive research across multiple domains",
|
||||
max_loops=1,
|
||||
)
|
||||
|
||||
|
||||
swarm.run(
|
||||
"What are the biggest gas and oil companies in russia? Only provide 3 queries"
|
||||
)
|
@ -1,13 +0,0 @@
|
||||
from swarms.structs.deep_research_swarm import DeepResearchSwarm
|
||||
|
||||
|
||||
def main():
|
||||
swarm = DeepResearchSwarm(
|
||||
name="Deep Research Swarm",
|
||||
description="A swarm of agents that can perform deep research on a given topic",
|
||||
)
|
||||
|
||||
swarm.run("What are the latest news in the AI an crypto space")
|
||||
|
||||
|
||||
main()
|
@ -1,23 +0,0 @@
|
||||
from swarms.structs.deep_research_swarm import DeepResearchSwarm
|
||||
|
||||
|
||||
def main():
|
||||
swarm = DeepResearchSwarm(
|
||||
name="Deep Research Swarm",
|
||||
description="A swarm of agents that can perform deep research on a given topic",
|
||||
output_type="string", # Change to string output type for better readability
|
||||
)
|
||||
|
||||
# Format the query as a proper question
|
||||
query = "What are the latest developments and news in the AI and cryptocurrency space?"
|
||||
|
||||
try:
|
||||
result = swarm.run(query)
|
||||
print("\nResearch Results:")
|
||||
print(result)
|
||||
except Exception as e:
|
||||
print(f"Error occurred: {str(e)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,13 +0,0 @@
|
||||
from swarms.structs.deep_research_swarm import DeepResearchSwarm
|
||||
|
||||
|
||||
swarm = DeepResearchSwarm(
|
||||
name="Deep Research Swarm",
|
||||
description="A swarm of agents that can perform deep research on a given topic",
|
||||
output_type="all", # Change to string output type for better readability
|
||||
)
|
||||
|
||||
out = swarm.run(
|
||||
"What are the latest developments and news in the AI and cryptocurrency space?"
|
||||
)
|
||||
print(out)
|
@ -0,0 +1,208 @@
|
||||
import os
|
||||
from typing import List, Union
|
||||
|
||||
from swarms.agents.create_agents_from_yaml import (
|
||||
ReturnTypes,
|
||||
create_agents_from_yaml,
|
||||
)
|
||||
from swarms.structs.agent import Agent
|
||||
from swarms.structs.csv_to_agent import AgentLoader as CSVAgentLoader
|
||||
from swarms.utils.agent_loader_markdown import (
|
||||
load_agent_from_markdown,
|
||||
load_agents_from_markdown,
|
||||
AgentLoader as MarkdownAgentLoader,
|
||||
)
|
||||
|
||||
|
||||
class AgentLoader:
|
||||
"""
|
||||
Loader class for creating Agent objects from various file formats.
|
||||
|
||||
This class provides methods to load agents from Markdown, YAML, and CSV files.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Initialize the AgentLoader instance.
|
||||
"""
|
||||
pass
|
||||
|
||||
def load_agents_from_markdown(
|
||||
self,
|
||||
file_paths: Union[str, List[str]],
|
||||
concurrent: bool = True,
|
||||
max_file_size_mb: float = 10.0,
|
||||
**kwargs,
|
||||
) -> List[Agent]:
|
||||
"""
|
||||
Load multiple agents from one or more Markdown files.
|
||||
|
||||
Args:
|
||||
file_paths (Union[str, List[str]]): Path or list of paths to Markdown file(s) containing agent definitions.
|
||||
concurrent (bool, optional): Whether to load files concurrently. Defaults to True.
|
||||
max_file_size_mb (float, optional): Maximum file size in MB to process. Defaults to 10.0.
|
||||
**kwargs: Additional keyword arguments passed to the underlying loader.
|
||||
|
||||
Returns:
|
||||
List[Agent]: A list of loaded Agent objects.
|
||||
"""
|
||||
return load_agents_from_markdown(
|
||||
file_paths=file_paths,
|
||||
concurrent=concurrent,
|
||||
max_file_size_mb=max_file_size_mb,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def load_agent_from_markdown(
|
||||
self, file_path: str, **kwargs
|
||||
) -> Agent:
|
||||
"""
|
||||
Load a single agent from a Markdown file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the Markdown file containing the agent definition.
|
||||
**kwargs: Additional keyword arguments passed to the underlying loader.
|
||||
|
||||
Returns:
|
||||
Agent: The loaded Agent object.
|
||||
"""
|
||||
return load_agent_from_markdown(file_path=file_path, **kwargs)
|
||||
|
||||
def load_agents_from_yaml(
|
||||
self,
|
||||
yaml_file: str,
|
||||
return_type: ReturnTypes = "auto",
|
||||
**kwargs,
|
||||
) -> List[Agent]:
|
||||
"""
|
||||
Load agents from a YAML file.
|
||||
|
||||
Args:
|
||||
yaml_file (str): Path to the YAML file containing agent definitions.
|
||||
return_type (ReturnTypes, optional): The return type for the loader. Defaults to "auto".
|
||||
**kwargs: Additional keyword arguments passed to the underlying loader.
|
||||
|
||||
Returns:
|
||||
List[Agent]: A list of loaded Agent objects.
|
||||
"""
|
||||
return create_agents_from_yaml(
|
||||
yaml_file=yaml_file, return_type=return_type, **kwargs
|
||||
)
|
||||
|
||||
def load_many_agents_from_yaml(
|
||||
self,
|
||||
yaml_files: List[str],
|
||||
return_types: List[ReturnTypes] = ["auto"],
|
||||
**kwargs,
|
||||
) -> List[Agent]:
|
||||
"""
|
||||
Load agents from multiple YAML files.
|
||||
|
||||
Args:
|
||||
yaml_files (List[str]): List of YAML file paths containing agent definitions.
|
||||
return_types (List[ReturnTypes], optional): List of return types for each YAML file. Defaults to ["auto"].
|
||||
**kwargs: Additional keyword arguments passed to the underlying loader.
|
||||
|
||||
Returns:
|
||||
List[Agent]: A list of loaded Agent objects from all files.
|
||||
"""
|
||||
return [
|
||||
self.load_agents_from_yaml(
|
||||
yaml_file=yaml_file,
|
||||
return_type=return_types[i],
|
||||
**kwargs,
|
||||
)
|
||||
for i, yaml_file in enumerate(yaml_files)
|
||||
]
|
||||
|
||||
def load_agents_from_csv(
|
||||
self, csv_file: str, **kwargs
|
||||
) -> List[Agent]:
|
||||
"""
|
||||
Load agents from a CSV file.
|
||||
|
||||
Args:
|
||||
csv_file (str): Path to the CSV file containing agent definitions.
|
||||
**kwargs: Additional keyword arguments passed to the underlying loader.
|
||||
|
||||
Returns:
|
||||
List[Agent]: A list of loaded Agent objects.
|
||||
"""
|
||||
loader = CSVAgentLoader(file_path=csv_file)
|
||||
return loader.load_agents()
|
||||
|
||||
def auto(self, file_path: str, *args, **kwargs):
|
||||
"""
|
||||
Automatically load agents from a file based on its extension.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the agent file (Markdown, YAML, or CSV).
|
||||
*args: Additional positional arguments passed to the underlying loader.
|
||||
**kwargs: Additional keyword arguments passed to the underlying loader.
|
||||
|
||||
Returns:
|
||||
List[Agent]: A list of loaded Agent objects.
|
||||
|
||||
Raises:
|
||||
ValueError: If the file type is not supported.
|
||||
"""
|
||||
if file_path.endswith(".md"):
|
||||
return self.load_agents_from_markdown(
|
||||
file_path, *args, **kwargs
|
||||
)
|
||||
elif file_path.endswith(".yaml"):
|
||||
return self.load_agents_from_yaml(
|
||||
file_path, *args, **kwargs
|
||||
)
|
||||
elif file_path.endswith(".csv"):
|
||||
return self.load_agents_from_csv(
|
||||
file_path, *args, **kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file type: {file_path}")
|
||||
|
||||
def load_single_agent(self, *args, **kwargs):
|
||||
"""
|
||||
Load a single agent from a file of a supported type.
|
||||
|
||||
Args:
|
||||
*args: Positional arguments passed to the underlying loader.
|
||||
**kwargs: Keyword arguments passed to the underlying loader.
|
||||
|
||||
Returns:
|
||||
Agent: The loaded Agent object.
|
||||
"""
|
||||
return self.auto(*args, **kwargs)
|
||||
|
||||
def load_multiple_agents(
|
||||
self, file_paths: List[str], *args, **kwargs
|
||||
):
|
||||
"""
|
||||
Load multiple agents from a list of files of various supported types.
|
||||
|
||||
Args:
|
||||
file_paths (List[str]): List of file paths to agent files (Markdown, YAML, or CSV).
|
||||
*args: Additional positional arguments passed to the underlying loader.
|
||||
**kwargs: Additional keyword arguments passed to the underlying loader.
|
||||
|
||||
Returns:
|
||||
List[Agent]: A list of loaded Agent objects from all files.
|
||||
"""
|
||||
return [
|
||||
self.auto(file_path, *args, **kwargs)
|
||||
for file_path in file_paths
|
||||
]
|
||||
|
||||
def parse_markdown_file(self, file_path: str):
|
||||
"""
|
||||
Parse a Markdown file and return the agents defined within.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the Markdown file.
|
||||
|
||||
Returns:
|
||||
List[Agent]: A list of Agent objects parsed from the file.
|
||||
"""
|
||||
return MarkdownAgentLoader(
|
||||
max_workers=os.cpu_count()
|
||||
).parse_markdown_file(file_path=file_path)
|
@ -1,479 +0,0 @@
|
||||
import concurrent.futures
|
||||
import json
|
||||
import os
|
||||
from typing import Any, List
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from rich.console import Console
|
||||
import requests
|
||||
|
||||
from swarms.structs.agent import Agent
|
||||
from swarms.structs.conversation import Conversation
|
||||
from swarms.utils.formatter import formatter
|
||||
from swarms.utils.history_output_formatter import (
|
||||
history_output_formatter,
|
||||
)
|
||||
from swarms.utils.str_to_dict import str_to_dict
|
||||
|
||||
console = Console()
|
||||
load_dotenv()
|
||||
|
||||
# Number of worker threads for concurrent operations
|
||||
MAX_WORKERS = (
|
||||
os.cpu_count() * 2
|
||||
) # Optimal number of workers based on CPU cores
|
||||
|
||||
|
||||
def exa_search(query: str, **kwargs: Any) -> str:
|
||||
"""Performs web search using Exa.ai API and returns formatted results."""
|
||||
api_url = "https://api.exa.ai/search"
|
||||
api_key = os.getenv("EXA_API_KEY")
|
||||
|
||||
if not api_key:
|
||||
return "### Error\nEXA_API_KEY environment variable not set\n"
|
||||
|
||||
headers = {
|
||||
"x-api-key": api_key,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
safe_kwargs = {
|
||||
str(k): v
|
||||
for k, v in kwargs.items()
|
||||
if k is not None and v is not None and str(k) != "None"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"query": query,
|
||||
"useAutoprompt": True,
|
||||
"numResults": safe_kwargs.get("num_results", 10),
|
||||
"contents": {
|
||||
"text": True,
|
||||
"highlights": {"numSentences": 10},
|
||||
},
|
||||
}
|
||||
|
||||
for key, value in safe_kwargs.items():
|
||||
if key not in payload and key not in [
|
||||
"query",
|
||||
"useAutoprompt",
|
||||
"numResults",
|
||||
"contents",
|
||||
]:
|
||||
payload[key] = value
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
api_url, json=payload, headers=headers
|
||||
)
|
||||
if response.status_code != 200:
|
||||
return f"### Error\nHTTP {response.status_code}: {response.text}\n"
|
||||
json_data = response.json()
|
||||
except Exception as e:
|
||||
return f"### Error\n{str(e)}\n"
|
||||
|
||||
if "error" in json_data:
|
||||
return f"### Error\n{json_data['error']}\n"
|
||||
|
||||
formatted_text = []
|
||||
search_params = json_data.get("effectiveFilters", {})
|
||||
query = search_params.get("query", "General web search")
|
||||
formatted_text.append(
|
||||
f"### Exa Search Results for: '{query}'\n\n---\n"
|
||||
)
|
||||
|
||||
results = json_data.get("results", [])
|
||||
if not results:
|
||||
formatted_text.append("No results found.\n")
|
||||
return "".join(formatted_text)
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
title = result.get("title", "No title")
|
||||
url = result.get("url", result.get("id", "No URL"))
|
||||
published_date = result.get("publishedDate", "")
|
||||
highlights = result.get("highlights", [])
|
||||
highlight_text = (
|
||||
"\n".join(
|
||||
(
|
||||
h.get("text", str(h))
|
||||
if isinstance(h, dict)
|
||||
else str(h)
|
||||
)
|
||||
for h in highlights[:3]
|
||||
)
|
||||
if highlights
|
||||
else "No summary available"
|
||||
)
|
||||
|
||||
formatted_text.extend(
|
||||
[
|
||||
f"{i}. **{title}**\n",
|
||||
f" - URL: {url}\n",
|
||||
f" - Published: {published_date.split('T')[0] if published_date else 'Date unknown'}\n",
|
||||
f" - Key Points:\n {highlight_text}\n\n",
|
||||
]
|
||||
)
|
||||
|
||||
return "".join(formatted_text)
|
||||
|
||||
|
||||
# Define the research tools schema
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search_topic",
|
||||
"description": "Conduct a thorough search on a specified topic or subtopic, generating a precise array of highly detailed search queries tailored to the input parameters.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"depth": {
|
||||
"type": "integer",
|
||||
"description": "Indicates the level of thoroughness for the search. Values range from 1 to 3, where 1 signifies a superficial search and 3 indicates an in-depth exploration of the topic.",
|
||||
},
|
||||
"detailed_queries": {
|
||||
"type": "array",
|
||||
"description": "An array of specific search queries generated based on the input query and the specified depth. Each query must be crafted to elicit detailed and relevant information from various sources.",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"description": "Each item in this array must represent a unique search query targeting a specific aspect of the main topic, ensuring a comprehensive exploration of the subject matter.",
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["depth", "detailed_queries"],
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
RESEARCH_AGENT_PROMPT = """
|
||||
You are an advanced research agent specialized in conducting deep, comprehensive research across multiple domains.
|
||||
Your task is to:
|
||||
|
||||
1. Break down complex topics into searchable subtopics
|
||||
2. Generate diverse search queries to explore each subtopic thoroughly
|
||||
3. Identify connections and patterns across different areas of research
|
||||
4. Synthesize findings into coherent insights
|
||||
5. Identify gaps in current knowledge and suggest areas for further investigation
|
||||
|
||||
For each research task:
|
||||
- Consider multiple perspectives and approaches
|
||||
- Look for both supporting and contradicting evidence
|
||||
- Evaluate the credibility and relevance of sources
|
||||
- Track emerging trends and recent developments
|
||||
- Consider cross-disciplinary implications
|
||||
|
||||
Output Format:
|
||||
- Provide structured research plans
|
||||
- Include specific search queries for each subtopic
|
||||
- Prioritize queries based on relevance and potential impact
|
||||
- Suggest follow-up areas for deeper investigation
|
||||
"""
|
||||
|
||||
SUMMARIZATION_AGENT_PROMPT = """
|
||||
You are an expert information synthesis and summarization agent designed for producing clear, accurate, and insightful summaries of complex information. Your core capabilities include:
|
||||
|
||||
|
||||
Core Capabilities:
|
||||
- Identify and extract key concepts, themes, and insights from any given content
|
||||
- Recognize patterns, relationships, and hierarchies within information
|
||||
- Filter out noise while preserving crucial context and nuance
|
||||
- Handle multiple sources and perspectives simultaneously
|
||||
|
||||
Summarization Strategy
|
||||
1. Multi-level Structure
|
||||
- Provide an extensive summary
|
||||
- Follow with key findings
|
||||
- Include detailed insights with supporting evidence
|
||||
- End with implications or next steps when relevant
|
||||
|
||||
2. Quality Standards
|
||||
- Maintain factual accuracy and precision
|
||||
- Preserve important technical details and terminology
|
||||
- Avoid oversimplification of complex concepts
|
||||
- Include quantitative data when available
|
||||
- Cite or reference specific sources when summarizing claims
|
||||
|
||||
3. Clarity & Accessibility
|
||||
- Use clear, concise language
|
||||
- Define technical terms when necessary
|
||||
- Structure information logically
|
||||
- Use formatting to enhance readability
|
||||
- Maintain appropriate level of technical depth for the audience
|
||||
|
||||
4. Synthesis & Analysis
|
||||
- Identify conflicting information or viewpoints
|
||||
- Highlight consensus across sources
|
||||
- Note gaps or limitations in the information
|
||||
- Draw connections between related concepts
|
||||
- Provide context for better understanding
|
||||
|
||||
OUTPUT REQUIREMENTS:
|
||||
- Begin with a clear statement of the topic or question being addressed
|
||||
- Use consistent formatting and structure
|
||||
- Clearly separate different levels of detail
|
||||
- Include confidence levels for conclusions when appropriate
|
||||
- Note any areas requiring additional research or clarification
|
||||
|
||||
Remember: Your goal is to make complex information accessible while maintaining accuracy and depth. Prioritize clarity without sacrificing important nuance or detail."""
|
||||
|
||||
|
||||
class DeepResearchSwarm:
|
||||
def __init__(
|
||||
self,
|
||||
name: str = "DeepResearchSwarm",
|
||||
description: str = "A swarm that conducts comprehensive research across multiple domains",
|
||||
max_loops: int = 1,
|
||||
nice_print: bool = True,
|
||||
output_type: str = "json",
|
||||
max_workers: int = os.cpu_count()
|
||||
* 2, # Let the system decide optimal thread count
|
||||
token_count: bool = False,
|
||||
research_model_name: str = "gpt-4o-mini",
|
||||
claude_summarization_model_name: str = "claude-3-5-sonnet-20240620",
|
||||
):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.max_loops = max_loops
|
||||
self.nice_print = nice_print
|
||||
self.output_type = output_type
|
||||
self.max_workers = max_workers
|
||||
self.research_model_name = research_model_name
|
||||
self.claude_summarization_model_name = (
|
||||
claude_summarization_model_name
|
||||
)
|
||||
|
||||
self.reliability_check()
|
||||
self.conversation = Conversation(token_count=token_count)
|
||||
|
||||
# Create a persistent ThreadPoolExecutor for the lifetime of the swarm
|
||||
# This eliminates thread creation overhead on each query
|
||||
self.executor = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=self.max_workers
|
||||
)
|
||||
|
||||
# Initialize the research agent
|
||||
self.research_agent = Agent(
|
||||
agent_name="Deep-Research-Agent",
|
||||
agent_description="Specialized agent for conducting comprehensive research across multiple domains",
|
||||
system_prompt=RESEARCH_AGENT_PROMPT,
|
||||
max_loops=1, # Allow multiple iterations for thorough research
|
||||
tools_list_dictionary=tools,
|
||||
model_name=self.research_model_name,
|
||||
output_type="final",
|
||||
)
|
||||
|
||||
self.summarization_agent = Agent(
|
||||
agent_name="Summarization-Agent",
|
||||
agent_description="Specialized agent for summarizing research results",
|
||||
system_prompt=SUMMARIZATION_AGENT_PROMPT,
|
||||
max_loops=1,
|
||||
model_name=self.claude_summarization_model_name,
|
||||
output_type="final",
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
"""Clean up the executor on object destruction"""
|
||||
self.executor.shutdown(wait=False)
|
||||
|
||||
def reliability_check(self):
|
||||
"""Check the reliability of the query"""
|
||||
if self.max_loops < 1:
|
||||
raise ValueError("max_loops must be greater than 0")
|
||||
|
||||
formatter.print_panel(
|
||||
"DeepResearchSwarm is booting up...", "blue"
|
||||
)
|
||||
formatter.print_panel("Reliability check passed", "green")
|
||||
|
||||
def get_queries(self, query: str) -> List[str]:
|
||||
"""
|
||||
Generate a list of detailed search queries based on the input query.
|
||||
|
||||
Args:
|
||||
query (str): The main research query to explore
|
||||
|
||||
Returns:
|
||||
List[str]: A list of detailed search queries
|
||||
"""
|
||||
self.conversation.add(role="User", content=query)
|
||||
|
||||
# Get the agent's response
|
||||
agent_output = self.research_agent.run(query)
|
||||
|
||||
# Transform the string into a list of dictionaries
|
||||
agent_output = json.loads(agent_output)
|
||||
print(agent_output)
|
||||
print(type(agent_output))
|
||||
|
||||
formatter.print_panel(
|
||||
f"Agent output type: {type(agent_output)} \n {agent_output}",
|
||||
"blue",
|
||||
)
|
||||
|
||||
# Convert the output to a dictionary if it's a list
|
||||
if isinstance(agent_output, list):
|
||||
agent_output = json.dumps(agent_output)
|
||||
|
||||
if isinstance(agent_output, str):
|
||||
# Convert the string output to dictionary
|
||||
output_dict = (
|
||||
str_to_dict(agent_output)
|
||||
if isinstance(agent_output, str)
|
||||
else agent_output
|
||||
)
|
||||
|
||||
# Extract the detailed queries from the output
|
||||
# Search for the key "detailed_queries" in the output list[dictionary]
|
||||
if isinstance(output_dict, list):
|
||||
for item in output_dict:
|
||||
if "detailed_queries" in item:
|
||||
queries = item["detailed_queries"]
|
||||
break
|
||||
else:
|
||||
queries = output_dict.get("detailed_queries", [])
|
||||
|
||||
print(queries)
|
||||
|
||||
# Log the number of queries generated
|
||||
formatter.print_panel(
|
||||
f"Generated {len(queries)} queries", "blue"
|
||||
)
|
||||
|
||||
print(queries)
|
||||
print(type(queries))
|
||||
|
||||
return queries
|
||||
|
||||
def step(self, query: str):
|
||||
"""
|
||||
Execute a single research step with maximum parallelism.
|
||||
|
||||
Args:
|
||||
query (str): The research query to process
|
||||
|
||||
Returns:
|
||||
Formatted conversation history
|
||||
"""
|
||||
try:
|
||||
# Get all the queries to process
|
||||
queries = self.get_queries(query)
|
||||
|
||||
print(queries)
|
||||
|
||||
# Submit all queries for concurrent processing
|
||||
futures = []
|
||||
for q in queries:
|
||||
future = self.executor.submit(exa_search, q)
|
||||
futures.append((q, future))
|
||||
|
||||
# Process results as they complete
|
||||
for q, future in futures:
|
||||
try:
|
||||
# Get search results only
|
||||
results = future.result()
|
||||
|
||||
# Add search results to conversation
|
||||
self.conversation.add(
|
||||
role="User",
|
||||
content=f"Search results for {q}: \n {results}",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Handle any errors in the thread
|
||||
error_msg = (
|
||||
f"Error processing query '{q}': {str(e)}"
|
||||
)
|
||||
console.print(f"[bold red]{error_msg}[/bold red]")
|
||||
self.conversation.add(
|
||||
role="System",
|
||||
content=error_msg,
|
||||
)
|
||||
|
||||
# Generate final comprehensive analysis after all searches are complete
|
||||
try:
|
||||
final_summary = self.summarization_agent.run(
|
||||
f"Please generate a comprehensive 4,000-word report analyzing the following content: {self.conversation.get_str()}"
|
||||
)
|
||||
|
||||
self.conversation.add(
|
||||
role=self.summarization_agent.agent_name,
|
||||
content=final_summary,
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = (
|
||||
f"Error generating final summary: {str(e)}"
|
||||
)
|
||||
console.print(f"[bold red]{error_msg}[/bold red]")
|
||||
self.conversation.add(
|
||||
role="System",
|
||||
content=error_msg,
|
||||
)
|
||||
|
||||
# Return formatted output
|
||||
result = history_output_formatter(
|
||||
self.conversation, type=self.output_type
|
||||
)
|
||||
|
||||
# If output type is JSON, ensure it's properly formatted
|
||||
if self.output_type.lower() == "json":
|
||||
try:
|
||||
import json
|
||||
|
||||
if isinstance(result, str):
|
||||
# Try to parse and reformat for pretty printing
|
||||
parsed = json.loads(result)
|
||||
return json.dumps(
|
||||
parsed, indent=2, ensure_ascii=False
|
||||
)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# If parsing fails, return as-is
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Critical error in step execution: {str(e)}"
|
||||
console.print(f"[bold red]{error_msg}[/bold red]")
|
||||
return (
|
||||
{"error": error_msg}
|
||||
if self.output_type.lower() == "json"
|
||||
else error_msg
|
||||
)
|
||||
|
||||
def run(self, task: str):
|
||||
return self.step(task)
|
||||
|
||||
def batched_run(self, tasks: List[str]):
|
||||
"""
|
||||
Execute a list of research tasks in parallel.
|
||||
|
||||
Args:
|
||||
tasks (List[str]): A list of research tasks to execute
|
||||
|
||||
Returns:
|
||||
List[str]: A list of formatted conversation histories
|
||||
"""
|
||||
futures = []
|
||||
for task in tasks:
|
||||
future = self.executor.submit(self.step, task)
|
||||
futures.append((task, future))
|
||||
|
||||
|
||||
# Example usage
|
||||
# if __name__ == "__main__":
|
||||
# try:
|
||||
# swarm = DeepResearchSwarm(
|
||||
# output_type="json",
|
||||
# )
|
||||
# result = swarm.step(
|
||||
# "What is the active tariff situation with mexico? Only create 2 queries"
|
||||
# )
|
||||
|
||||
# # Parse and display results in rich format with markdown export
|
||||
# swarm.parse_and_display_results(result, export_markdown=True)
|
||||
|
||||
# except Exception as e:
|
||||
# print(f"Error running deep research swarm: {str(e)}")
|
||||
# import traceback
|
||||
# traceback.print_exc()
|
Loading…
Reference in new issue