Merge pull request #873 from harshalmore31/Fix/deep_research_swarm

Fixed #865 DeepResearchSwarm
3 weeks ago · 86facfd32a
parent 156f98a2c2 7a40494101
commit 86facfd32a
1 changed files with 204 additions and 56 deletions
--- a/swarms/structs/deep_research_swarm.py
+++ b/swarms/structs/deep_research_swarm.py
@ -1,12 +1,18 @@
 import asyncio
 import concurrent.futures
 import json
 import os
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
 import aiohttp
 from dotenv import load_dotenv
 from rich.console import Console
 from rich.panel import Panel
 from rich.text import Text
 from rich.tree import Tree
 from swarms.agents.reasoning_duo import ReasoningDuo
 from swarms.structs.agent import Agent
@ -116,22 +122,38 @@ async def _async_exa_search(
 ) -> Dict[str, Any]:
    """Asynchronous helper function for Exa.ai API requests"""
    api_url = "https://api.exa.ai/search"
    # Check if API key is available
    api_key = os.getenv("EXA_API_KEY")
    if not api_key:
        return {"error": "EXA_API_KEY environment variable not set"}
    headers = {
-        "x-api-key": os.getenv("EXA_API_KEY"),
+        "x-api-key": api_key,
        "Content-Type": "application/json",
    }
    # Filter out None keys AND None values from kwargs
    safe_kwargs = {
        str(k): v for k, v in kwargs.items() 
        if k is not None and v is not None and str(k) != "None"
    }
    payload = {
        "query": query,
        "useAutoprompt": True,
-        "numResults": kwargs.get("num_results", 10),
+        "numResults": safe_kwargs.get("num_results", 10),
        "contents": {
            "text": True,
            "highlights": {"numSentences": 2},
        },
        **kwargs,
    }
    # Only add safe_kwargs if they don't conflict with existing keys
    for key, value in safe_kwargs.items():
        if key not in payload and key not in ["query", "useAutoprompt", "numResults", "contents"]:
            payload[key] = value
    try:
        async with aiohttp.ClientSession() as session:
            async with session.post(
@ -370,24 +392,20 @@ class DeepResearchSwarm:
        return []
-    def _process_query(self, query: str) -> Tuple[str, str]:
+    def _process_query(self, query: str) -> str:
        """
-        Process a single query with search and reasoning.
+        Process a single query with search only.
        This function is designed to be run in a separate thread.
        Args:
            query (str): The query to process
        Returns:
-            Tuple[str, str]: A tuple containing (search_results, reasoning_output)
+            str: Search results
        """
-        # Run the search
+        # Run the search only - no individual reasoning to avoid duplication
        results = exa_search(query)
-
+        return results
        # Run the reasoning on the search results
        reasoning_output = self.reasoning_duo.run(results)
        return (results, reasoning_output)
    def step(self, query: str):
        """
@ -399,21 +417,28 @@ class DeepResearchSwarm:
        Returns:
            Formatted conversation history
        """
        try:
            # Get all the queries to process
            queries = self.get_queries(query)
            if not queries:
                error_msg = "No queries generated. Please check your input."
                self.conversation.add(role="System", content=error_msg)
                return history_output_formatter(
                    self.conversation, type=self.output_type
                )
            # Submit all queries for concurrent processing
        # Using a list instead of generator for clearer debugging
            futures = []
            for q in queries:
                future = self.executor.submit(self._process_query, q)
                futures.append((q, future))
-        # Process results as they complete (no waiting for slower queries)
+            # Process results as they complete
            for q, future in futures:
                try:
-                # Get results (blocks until this specific future is done)
+                    # Get search results only
-                results, reasoning_output = future.result()
+                    results = future.result()
                    # Add search results to conversation
                    self.conversation.add(
@ -421,20 +446,17 @@ class DeepResearchSwarm:
                        content=f"Search results for {q}: \n {results}",
                    )
                # Add reasoning output to conversation
                self.conversation.add(
                    role=self.reasoning_duo.agent_name,
                    content=reasoning_output,
                )
                except Exception as e:
                    # Handle any errors in the thread
                    error_msg = f"Error processing query '{q}': {str(e)}"
                    console.print(f"[bold red]{error_msg}[/bold red]")
                    self.conversation.add(
                        role="System",
-                    content=f"Error processing query '{q}': {str(e)}",
+                        content=error_msg,
                    )
-        # Once all query processing is complete, generate the final summary
+            # Generate final comprehensive analysis after all searches are complete
-        # This step runs after all queries to ensure it summarizes all results
+            try:
                final_summary = self.reasoning_duo.run(
                    f"Generate an extensive report of the following content: {self.conversation.get_str()}"
                )
@ -443,11 +465,38 @@ class DeepResearchSwarm:
                    role=self.reasoning_duo.agent_name,
                    content=final_summary,
                )
            except Exception as e:
                error_msg = f"Error generating final summary: {str(e)}"
                console.print(f"[bold red]{error_msg}[/bold red]")
                self.conversation.add(
                    role="System",
                    content=error_msg,
                )
-        return history_output_formatter(
+            # Return formatted output
            result = history_output_formatter(
                self.conversation, type=self.output_type
            )
            # If output type is JSON, ensure it's properly formatted
            if self.output_type.lower() == "json":
                try:
                    import json
                    if isinstance(result, str):
                        # Try to parse and reformat for pretty printing
                        parsed = json.loads(result)
                        return json.dumps(parsed, indent=2, ensure_ascii=False)
                except (json.JSONDecodeError, TypeError):
                    # If parsing fails, return as-is
                    pass
            return result
        except Exception as e:
            error_msg = f"Critical error in step execution: {str(e)}"
            console.print(f"[bold red]{error_msg}[/bold red]")
            return {"error": error_msg} if self.output_type.lower() == "json" else error_msg
    def run(self, task: str):
        return self.step(task)
@ -466,14 +515,113 @@ class DeepResearchSwarm:
            future = self.executor.submit(self.step, task)
            futures.append((task, future))
    def parse_and_display_results(self, json_result: str, export_markdown: bool = True):
        """
        Parse JSON results and display in rich format with optional markdown export.
        Args:
            json_result (str): JSON string containing conversation results
            export_markdown (bool): Whether to export to markdown file
        """
        try:
            # Parse JSON
            data = json.loads(json_result)
            # Create rich display
            console.print("\n" + "="*100, style="cyan")
            console.print("🔬 DEEP RESEARCH RESULTS", style="bold cyan", justify="center")
            console.print("="*100, style="cyan")
            # Create conversation tree
            tree = Tree("🗣️ Research Conversation", style="bold blue")
            markdown_content = ["# Deep Research Results\n", f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"]
            for i, entry in enumerate(data, 1):
                if isinstance(entry, dict):
                    role = entry.get('role', 'Unknown')
                    content = entry.get('content', '')
                    timestamp = entry.get('timestamp', '')
                    # Get role info for display
                    role_info = self._get_role_display_info(role)
                    # Create tree branch
                    branch_text = f"{role_info['emoji']} {role}"
                    if timestamp:
                        time_part = timestamp.split()[-1] if ' ' in timestamp else timestamp[-8:]
                        branch_text += f" ({time_part})"
                    branch = tree.add(branch_text, style=role_info['style'])
                    # Add content preview to tree
                    content_preview = content[:150] + "..." if len(content) > 150 else content
                    content_preview = content_preview.replace('\n', ' ')
                    branch.add(content_preview, style="dim")
                    # Add to markdown
                    markdown_content.append(f"\n## {i}. {role}")
                    if timestamp:
                        markdown_content.append(f"**Timestamp:** {timestamp}")
                    markdown_content.append(f"\n{content}\n")
                    # Display full content for important entries
                    if role.lower() in ['reasoning-agent-01'] and len(content) > 300:
                        console.print(f"\n📋 {role} Full Response:", style="bold green")
                        console.print(Panel(content, border_style="green", title=f"{role} Analysis"))
            # Display the tree
            console.print(tree)
            # Export to markdown if requested
            if export_markdown:
                # Create deepsearch_results directory
                results_dir = Path("deepsearch_results")
                results_dir.mkdir(exist_ok=True)
                # Generate filename with timestamp
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                filename = results_dir / f"research_results_{timestamp}.md"
                # Write markdown file
                with open(filename, 'w', encoding='utf-8') as f:
                    f.write('\n'.join(markdown_content))
                console.print(f"\n💾 Results exported to: {filename}", style="bold green")
            console.print("\n✅ Research analysis complete!", style="bold cyan")
        except json.JSONDecodeError as e:
            console.print(f"❌ Error parsing JSON: {e}", style="red")
        except Exception as e:
            console.print(f"❌ Error displaying results: {e}", style="red")
    def _get_role_display_info(self, role: str) -> Dict[str, str]:
        """Get display information for different conversation roles."""
        role_map = {
            "user": {"emoji": "👤", "style": "cyan"},
            "deep-research-agent": {"emoji": "🔍", "style": "blue"}, 
            "reasoning-agent-01": {"emoji": "🧠", "style": "magenta"},
            "system": {"emoji": "⚙️", "style": "yellow"},
        }
-# # Example usage
+        role_lower = role.lower()
        return role_map.get(role_lower, {"emoji": "🤖", "style": "white"})
 # Example usage
 # if __name__ == "__main__":
 #     try:
 #         swarm = DeepResearchSwarm(
 #             output_type="json",
 #         )
-#     print(
+#         result = swarm.step(
-#         swarm.step(
+#             "What is the active tariff situation with mexico? Only create 2 queries"
 #             "What is the active tarrif situation with mexico? Only create 2 queries"
 #         )
 #         )
 #         # Parse and display results in rich format with markdown export
 #         swarm.parse_and_display_results(result, export_markdown=True)
 #     except Exception as e:
 #         print(f"Error running deep research swarm: {str(e)}")
 #         import traceback
 #         traceback.print_exc()