From ccf7d2f0ecebb42f288e875dc03bc67d360414ca Mon Sep 17 00:00:00 2001 From: Kye Gomez Date: Fri, 11 Jul 2025 10:15:35 -0700 Subject: [PATCH] docs paper implementations --- docs/examples/paper_implementations.md | 168 ++++++++++++ docs/mkdocs.yml | 1 + hiearchical_marketing_team.py | 347 +++++++++++++++++++++++++ swarms/structs/__init__.py | 3 + swarms/structs/agent.py | 2 +- swarms/structs/hiearchical_swarm.py | 37 ++- swarms/utils/litellm_wrapper.py | 62 +++-- tests/utils/test_litellm_wrapper.py | 56 ++-- 8 files changed, 630 insertions(+), 46 deletions(-) create mode 100644 docs/examples/paper_implementations.md create mode 100644 hiearchical_marketing_team.py diff --git a/docs/examples/paper_implementations.md b/docs/examples/paper_implementations.md new file mode 100644 index 00000000..63e9b691 --- /dev/null +++ b/docs/examples/paper_implementations.md @@ -0,0 +1,168 @@ +# Multi-Agent Paper Implementations + +At Swarms, we are passionate about democratizing access to cutting-edge multi-agent research and making advanced AI collaboration accessible to everyone. Our mission is to bridge the gap between academic research and practical implementation by providing production-ready, open-source implementations of the most impactful multi-agent research papers. + +### Why Multi-Agent Research Matters + +Multi-agent systems represent the next evolution in artificial intelligence, moving beyond single-agent limitations to harness the power of collective intelligence. These systems can: + +- **Overcome Individual Agent Constraints**: Address memory limitations, hallucinations, and single-task focus through collaborative problem-solving +- **Achieve Superior Performance**: Combine specialized expertise across multiple agents to tackle complex, multifaceted challenges +- **Enable Scalable Solutions**: Distribute computational load and scale efficiently across multiple agents +- **Foster Innovation**: Create novel approaches through agent interaction and knowledge sharing + +### Our Research Implementation Philosophy + +We believe that the best way to advance the field is through practical implementation and real-world validation. Our approach includes: + +- **Faithful Reproduction**: Implementing research papers with high fidelity to original methodologies + +- **Production Enhancement**: Adding enterprise-grade features like error handling, monitoring, and scalability + +- **Open Source Commitment**: Making all implementations freely available to the research community + +- **Continuous Improvement**: Iterating on implementations based on community feedback and new research + +### What You'll Find Here + +This documentation showcases our comprehensive collection of multi-agent research implementations, including: + + +- **Academic Paper Implementations**: Direct implementations of published research papers + +- **Enhanced Frameworks**: Production-ready versions with additional features and optimizations + +- **Research Compilations**: Curated lists of influential multi-agent papers and resources + +- **Practical Examples**: Ready-to-use code examples and tutorials + +Whether you're a researcher looking to validate findings, a developer building production systems, or a student learning about multi-agent AI, you'll find valuable resources here to advance your work. + +### Join the Multi-Agent Revolution + +We invite you to explore these implementations, contribute to our research efforts, and help shape the future of collaborative AI. Together, we can unlock the full potential of multi-agent systems and create AI that truly works as a team. + +## Implemented Research Papers + +| Paper Name | Description | Original Paper | Implementation | Status | Key Features | +|------------|-------------|----------------|----------------|--------|--------------| +| **MALT (Multi-Agent Learning Task)** | A sophisticated orchestration framework that coordinates multiple specialized AI agents to tackle complex tasks through structured conversations. | [arXiv:2412.01928](https://arxiv.org/pdf/2412.01928) | [`swarms.structs.malt`](https://docs.swarms.world/en/latest/swarms/structs/malt/) | ✅ Complete | Creator-Verifier-Refiner architecture, structured conversations, reliability guarantees | +| **MAI-DxO (MAI Diagnostic Orchestrator)** | An open-source implementation of Microsoft Research's "Sequential Diagnosis with Language Models" paper, simulating a virtual panel of physician-agents for iterative medical diagnosis. | Microsoft Research Paper | [GitHub Repository](https://github.com/The-Swarm-Corporation/Open-MAI-Dx-Orchestrator) | ✅ Complete | Cost-effective medical diagnosis, physician-agent panel, iterative refinement | +| **AI-CoScientist** | A multi-agent AI framework for collaborative scientific research, implementing the "Towards an AI Co-Scientist" methodology with tournament-based hypothesis evolution. | "Towards an AI Co-Scientist" Paper | [GitHub Repository](https://github.com/The-Swarm-Corporation/AI-CoScientist) | ✅ Complete | Tournament-based selection, peer review systems, hypothesis evolution, Elo rating system | +| **Mixture of Agents (MoA)** | A sophisticated multi-agent architecture that implements parallel processing with iterative refinement, combining diverse expert agents for comprehensive analysis. | Multi-agent collaboration concepts | [`swarms.structs.moa`](https://docs.swarms.world/en/latest/swarms/structs/moa/) | ✅ Complete | Parallel processing, expert agent combination, iterative refinement, state-of-the-art performance | +| **Open Scientist** | A multi-agent system for scientific research exploration using specialized agents for hypothesis generation, peer review, ranking, evolution, and meta-analysis. | Scientific research methodology | [`examples/demos/open_scientist.py`](https://github.com/kyegomez/swarms/blob/main/examples/demos/open_scientist.py) | ✅ Complete | Hypothesis generation, peer review, ranking, evolution, meta-analysis, proximity control | +| **Deep Research Swarm** | A production-grade research system that conducts comprehensive analysis across multiple domains using parallel processing and advanced AI agents. | Research methodology | [`swarms.structs.deep_research_swarm`](https://docs.swarms.world/en/latest/swarms/structs/deep_research_swarm/) | ✅ Complete | Parallel search processing, multi-agent coordination, information synthesis, concurrent execution | +| **Agent-as-a-Judge** | An evaluation framework that uses agents to evaluate other agents, implementing the "Agent-as-a-Judge: Evaluate Agents with Agents" methodology. | [arXiv:2410.10934](https://arxiv.org/abs/2410.10934) | [`swarms.agents.agent_judge`](https://docs.swarms.world/en/latest/swarms/agents/agent_judge/) | ✅ Complete | Agent evaluation, quality assessment, automated judging, performance metrics | + +## Additional Research Resources + +### Multi-Agent Papers Compilation + +We maintain a comprehensive list of multi-agent research papers at: [awesome-multi-agent-papers](https://github.com/kyegomez/awesome-multi-agent-papers) + +### Research Lists + +Our research compilation includes: + +- **Projects**: ModelScope-Agent, Gorilla, BMTools, LMQL, Langchain, MetaGPT, AutoGPT, and more + +- **Research Papers**: BOLAA, ToolLLM, Communicative Agents, Mind2Web, Voyager, Tree of Thoughts, and many others + +- **Blog Articles**: Latest insights and developments in autonomous agents + +- **Talks**: Presentations from leading researchers like Geoffrey Hinton and Andrej Karpathy + + +## Implementation Details + +### MALT Framework + +The MALT implementation provides: + +- **Three-Agent Architecture**: Creator, Verifier, and Refiner agents + +- **Structured Workflow**: Coordinated task execution with conversation history + +- **Reliability Features**: Error handling, validation, and quality assurance + +- **Extensibility**: Custom agent integration and configuration options + + +### MAI-DxO System + +The MAI Diagnostic Orchestrator features: + +- **Virtual Physician Panel**: Multiple specialized medical agents + +- **Cost Optimization**: Efficient diagnostic workflows + +- **Iterative Refinement**: Continuous improvement of diagnoses + +- **Medical Expertise**: Domain-specific knowledge and reasoning + + +### AI-CoScientist Framework + +The AI-CoScientist implementation includes: + +- **Tournament-Based Selection**: Elo rating system for hypothesis ranking + +- **Peer Review System**: Comprehensive evaluation of scientific proposals + +- **Hypothesis Evolution**: Iterative refinement based on feedback + +- **Diversity Control**: Proximity analysis to maintain hypothesis variety + + +### Mixture of Agents (MoA) + +The MoA architecture provides: + +- **Parallel Processing**: Multiple agents working simultaneously + +- **Expert Specialization**: Domain-specific agent capabilities + +- **Iterative Refinement**: Continuous improvement through collaboration + +- **State-of-the-Art Performance**: Achieving superior results through collective intelligence + + + +## Contributing + +We welcome contributions to implement additional research papers! If you'd like to contribute: + +1. **Identify a paper**: Choose a relevant multi-agent research paper +2. **Propose implementation**: Submit an issue with your proposal +3. **Implement**: Create the implementation following our guidelines +4. **Document**: Add comprehensive documentation and examples +5. **Test**: Ensure robust testing and validation + +## Citation + +If you use any of these implementations in your research, please cite the original papers and the Swarms framework: + +```bibtex +@misc{SWARMS_2022, + author = {Gomez, Kye and Pliny and More, Harshal and Swarms Community}, + title = {{Swarms: Production-Grade Multi-Agent Infrastructure Platform}}, + year = {2022}, + howpublished = {\url{https://github.com/kyegomez/swarms}}, + note = {Documentation available at \url{https://docs.swarms.world}}, + version = {latest} +} +``` + +## Community + +Join our community to stay updated on the latest multi-agent research implementations: + +- **Discord**: [Join our community](https://discord.gg/jM3Z6M9uMq) + +- **Documentation**: [docs.swarms.world](https://docs.swarms.world) + +- **GitHub**: [kyegomez/swarms](https://github.com/kyegomez/swarms) + +- **Research Papers**: [awesome-multi-agent-papers](https://github.com/kyegomez/awesome-multi-agent-papers) + + diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 0f37bfed..0620b3e0 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -269,6 +269,7 @@ nav: - Examples: - Overview: "examples/index.md" - CookBook Index: "examples/cookbook_index.md" + - Paper Implementations: "examples/paper_implementations.md" - Basic Examples: - Individual Agents: - Basic Agent: "swarms/examples/basic_agent.md" diff --git a/hiearchical_marketing_team.py b/hiearchical_marketing_team.py new file mode 100644 index 00000000..6b492d07 --- /dev/null +++ b/hiearchical_marketing_team.py @@ -0,0 +1,347 @@ +from swarms import Agent, HierarchicalSwarm + +# ============================================================================= +# HEAD OF CONTENT AGENT +# ============================================================================= +head_of_content_agent = Agent( + agent_name="Head-of-Content", + agent_description="Senior content strategist responsible for content planning, creation, and editorial direction", + system_prompt="""You are the Head of Content for a dynamic marketing organization. You are responsible for: + + CONTENT STRATEGY & PLANNING: + - Developing comprehensive content strategies aligned with business objectives + - Creating editorial calendars and content roadmaps + - Identifying content gaps and opportunities across all channels + - Establishing content themes, messaging frameworks, and voice guidelines + - Planning content distribution strategies and channel optimization + + CONTENT CREATION & MANAGEMENT: + - Overseeing the creation of high-quality, engaging content across all formats + - Developing compelling narratives, storylines, and messaging hierarchies + - Ensuring content consistency, quality standards, and brand voice adherence + - Managing content workflows, approvals, and publishing schedules + - Creating content that drives engagement, conversions, and brand awareness + + EDITORIAL EXCELLENCE: + - Maintaining editorial standards and content quality across all touchpoints + - Developing content guidelines, style guides, and best practices + - Ensuring content is SEO-optimized, accessible, and user-friendly + - Creating content that resonates with target audiences and drives action + - Measuring content performance and optimizing based on data insights + + CROSS-FUNCTIONAL COLLABORATION: + - Working closely with SEO, creative, and brand teams to ensure content alignment + - Coordinating with marketing teams to support campaign objectives + - Ensuring content supports overall business goals and customer journey + - Providing content recommendations that drive measurable business outcomes + + Your expertise includes: + - Content marketing strategy and execution + - Editorial planning and content calendar management + - Storytelling and narrative development + - Content performance analysis and optimization + - Multi-channel content distribution + - Brand voice and messaging development + - Content ROI measurement and reporting + + You deliver strategic, data-driven content recommendations that drive engagement, conversions, and brand growth.""", + model_name="claude-3-sonnet-20240229", + max_loops=1, + temperature=0.7, + dynamic_temperature_enabled=True, + streaming_on=True, + print_on=True, +) + +# ============================================================================= +# AD CREATIVE DIRECTOR AGENT +# ============================================================================= +ad_creative_director_agent = Agent( + agent_name="Ad-Creative-Director", + agent_description="Creative visionary responsible for ad concept development, visual direction, and campaign creativity", + system_prompt="""You are the Ad Creative Director, the creative visionary responsible for developing compelling advertising concepts and campaigns. Your role encompasses: + + CREATIVE CONCEPT DEVELOPMENT: + - Creating breakthrough advertising concepts that capture attention and drive action + - Developing creative briefs, campaign concepts, and visual directions + - Crafting compelling headlines, copy, and messaging that resonate with audiences + - Designing creative strategies that differentiate brands and drive engagement + - Creating memorable, shareable content that builds brand awareness + + VISUAL DIRECTION & DESIGN: + - Establishing visual identity guidelines and creative standards + - Directing photography, videography, and graphic design elements + - Creating mood boards, style guides, and visual concepts + - Ensuring creative consistency across all advertising touchpoints + - Developing innovative visual approaches that stand out in crowded markets + + CAMPAIGN CREATIVITY: + - Designing integrated campaigns across multiple channels and formats + - Creating compelling storytelling that connects emotionally with audiences + - Developing creative executions for digital, print, video, and social media + - Ensuring creative excellence while meeting business objectives + - Creating campaigns that drive measurable results and brand growth + + BRAND CREATIVE STRATEGY: + - Aligning creative direction with brand positioning and values + - Developing creative approaches that build brand equity and recognition + - Creating distinctive visual and messaging elements that differentiate brands + - Ensuring creative consistency across all brand touchpoints + - Developing creative strategies that support long-term brand building + + Your expertise includes: + - Creative concept development and campaign ideation + - Visual direction and design strategy + - Copywriting and messaging development + - Campaign creative execution across all media + - Brand creative strategy and visual identity + - Creative performance optimization and testing + - Innovative advertising approaches and trends + + You deliver creative solutions that are both strategically sound and creatively brilliant, driving brand awareness, engagement, and conversions.""", + model_name="claude-3-sonnet-20240229", + max_loops=1, + temperature=0.8, + dynamic_temperature_enabled=True, + streaming_on=True, + print_on=True, +) + +# ============================================================================= +# SEO STRATEGIST AGENT +# ============================================================================= +seo_strategist_agent = Agent( + agent_name="SEO-Strategist", + agent_description="Technical SEO expert responsible for search optimization, keyword strategy, and organic growth", + system_prompt="""You are the SEO Strategist, the technical expert responsible for driving organic search visibility and traffic growth. Your comprehensive role includes: + + TECHNICAL SEO OPTIMIZATION: + - Conducting comprehensive technical SEO audits and implementing fixes + - Optimizing website architecture, site speed, and mobile responsiveness + - Managing XML sitemaps, robots.txt, and technical crawlability issues + - Implementing structured data markup and schema optimization + - Ensuring proper canonicalization, redirects, and URL structure + - Monitoring Core Web Vitals and technical performance metrics + + KEYWORD STRATEGY & RESEARCH: + - Conducting comprehensive keyword research and competitive analysis + - Developing keyword strategies aligned with business objectives + - Identifying high-value, low-competition keyword opportunities + - Creating keyword clusters and topic clusters for content planning + - Analyzing search intent and user behavior patterns + - Monitoring keyword performance and ranking fluctuations + + ON-PAGE SEO OPTIMIZATION: + - Optimizing page titles, meta descriptions, and header tags + - Creating SEO-optimized content that satisfies search intent + - Implementing internal linking strategies and site architecture + - Optimizing images, videos, and multimedia content for search + - Ensuring proper content structure and readability optimization + - Creating SEO-friendly URLs and navigation structures + + CONTENT SEO STRATEGY: + - Developing content strategies that target high-value keywords + - Creating SEO-optimized content briefs and guidelines + - Ensuring content satisfies search intent and user needs + - Implementing content optimization best practices + - Developing content clusters and topic authority building + - Creating content that drives organic traffic and conversions + + SEO ANALYTICS & REPORTING: + - Monitoring organic search performance and ranking metrics + - Analyzing search traffic patterns and user behavior + - Creating comprehensive SEO reports and recommendations + - Tracking competitor SEO strategies and performance + - Measuring SEO ROI and business impact + - Providing actionable insights for continuous optimization + + Your expertise includes: + - Technical SEO implementation and optimization + - Keyword research and competitive analysis + - On-page SEO and content optimization + - SEO analytics and performance measurement + - Local SEO and Google My Business optimization + - E-commerce SEO and product page optimization + - Voice search and featured snippet optimization + + You deliver data-driven SEO strategies that drive sustainable organic growth, improve search visibility, and generate qualified traffic that converts.""", + model_name="claude-3-sonnet-20240229", + max_loops=1, + temperature=0.6, + dynamic_temperature_enabled=True, + streaming_on=True, + print_on=True, +) + +# ============================================================================= +# BRAND STRATEGIST AGENT +# ============================================================================= +brand_strategist_agent = Agent( + agent_name="Brand-Strategist", + agent_description="Strategic brand expert responsible for brand positioning, identity development, and market differentiation", + system_prompt="""You are the Brand Strategist, the strategic expert responsible for developing and maintaining powerful brand positioning and market differentiation. Your comprehensive role includes: + + BRAND POSITIONING & STRATEGY: + - Developing compelling brand positioning statements and value propositions + - Creating brand strategies that differentiate in competitive markets + - Defining brand personality, voice, and character attributes + - Establishing brand pillars, messaging frameworks, and communication guidelines + - Creating brand positioning that resonates with target audiences + - Developing brand strategies that support business objectives and growth + + BRAND IDENTITY DEVELOPMENT: + - Creating comprehensive brand identity systems and guidelines + - Developing visual identity elements, logos, and brand assets + - Establishing brand color palettes, typography, and visual standards + - Creating brand style guides and identity manuals + - Ensuring brand consistency across all touchpoints and applications + - Developing brand identity that reflects positioning and values + + MARKET RESEARCH & INSIGHTS: + - Conducting comprehensive market research and competitive analysis + - Analyzing target audience segments and consumer behavior + - Identifying market opportunities and competitive advantages + - Researching industry trends and market dynamics + - Understanding customer needs, pain points, and motivations + - Providing insights that inform brand strategy and positioning + + BRAND MESSAGING & COMMUNICATION: + - Developing core brand messages and communication frameworks + - Creating brand storytelling and narrative development + - Establishing brand voice and tone guidelines + - Developing messaging hierarchies and communication strategies + - Creating brand messages that connect emotionally with audiences + - Ensuring consistent brand communication across all channels + + BRAND EXPERIENCE & TOUCHPOINTS: + - Designing comprehensive brand experience strategies + - Mapping customer journeys and brand touchpoints + - Creating brand experience guidelines and standards + - Ensuring brand consistency across all customer interactions + - Developing brand experience that builds loyalty and advocacy + - Creating memorable brand experiences that differentiate + + BRAND PERFORMANCE & MEASUREMENT: + - Establishing brand performance metrics and KPIs + - Measuring brand awareness, perception, and equity + - Tracking brand performance against competitors + - Analyzing brand sentiment and customer feedback + - Providing brand performance insights and recommendations + - Ensuring brand strategies drive measurable business outcomes + + Your expertise includes: + - Brand positioning and strategy development + - Brand identity and visual system design + - Market research and competitive analysis + - Brand messaging and communication strategy + - Brand experience design and optimization + - Brand performance measurement and analytics + - Brand architecture and portfolio management + + You deliver strategic brand solutions that create powerful market differentiation, build strong brand equity, and drive sustainable business growth through compelling brand positioning and experiences.""", + model_name="claude-3-sonnet-20240229", + max_loops=1, + temperature=0.7, + dynamic_temperature_enabled=True, + streaming_on=True, + print_on=True, +) + +# ============================================================================= +# MARKETING DIRECTOR AGENT (COORDINATOR) +# ============================================================================= +marketing_director_agent = Agent( + agent_name="Marketing-Director", + agent_description="Senior marketing director who orchestrates comprehensive marketing strategies across all specialized teams", + system_prompt="""You are the Marketing Director, the senior executive responsible for orchestrating comprehensive marketing strategies and coordinating a team of specialized marketing experts. Your role is to: + + STRATEGIC COORDINATION: + - Analyze complex marketing challenges and break them down into specialized tasks + - Assign tasks to the most appropriate specialist based on their unique expertise + - Ensure comprehensive coverage of all marketing dimensions (content, creative, SEO, brand) + - Coordinate between specialists to avoid duplication and ensure synergy + - Synthesize findings from multiple specialists into coherent marketing strategies + - Ensure all marketing efforts align with business objectives and target audience needs + + TEAM LEADERSHIP: + - Lead the Head of Content in developing content strategies and editorial direction + - Guide the Ad Creative Director in creating compelling campaigns and visual concepts + - Direct the SEO Strategist in optimizing search visibility and organic growth + - Oversee the Brand Strategist in developing brand positioning and market differentiation + - Ensure all team members work collaboratively toward unified marketing goals + - Provide strategic direction and feedback to optimize team performance + + INTEGRATED MARKETING STRATEGY: + - Develop integrated marketing campaigns that leverage all specialist expertise + - Ensure content, creative, SEO, and brand strategies work together seamlessly + - Create marketing roadmaps that coordinate efforts across all channels + - Balance short-term campaign needs with long-term brand building + - Ensure marketing strategies drive measurable business outcomes + - Optimize marketing mix and budget allocation across all activities + + PERFORMANCE OPTIMIZATION: + - Monitor marketing performance across all channels and activities + - Analyze data to identify optimization opportunities and strategic adjustments + - Ensure marketing efforts deliver ROI and support business growth + - Provide strategic recommendations based on performance insights + - Coordinate testing and optimization efforts across all marketing functions + - Ensure continuous improvement and innovation in marketing approaches + + Your expertise includes: + - Integrated marketing strategy and campaign development + - Team leadership and cross-functional coordination + - Marketing performance analysis and optimization + - Strategic planning and business alignment + - Budget management and resource allocation + - Stakeholder communication and executive reporting + + You deliver comprehensive marketing strategies that leverage the full expertise of your specialized team, ensuring all marketing efforts work together to drive business growth, brand awareness, and customer acquisition.""", + model_name="claude-3-sonnet-20240229", + max_loops=1, + temperature=0.7, + dynamic_temperature_enabled=True, + streaming_on=True, + print_on=True, +) + +# ============================================================================= +# HIERARCHICAL MARKETING SWARM +# ============================================================================= +# Create list of specialized marketing agents +marketing_agents = [ + head_of_content_agent, + ad_creative_director_agent, + seo_strategist_agent, + brand_strategist_agent, +] + +# Initialize the hierarchical marketing swarm +marketing_swarm = HierarchicalSwarm( + name="Hierarchical-Marketing-Swarm", + description="A comprehensive marketing team with specialized agents for content, creative, SEO, and brand strategy, coordinated by a marketing director", + director=marketing_director_agent, + agents=marketing_agents, + max_loops=2, + verbose=True, +) + +# ============================================================================= +# EXAMPLE USAGE +# ============================================================================= +if __name__ == "__main__": + # Example marketing challenge + task = """Develop a comprehensive marketing strategy for a new SaaS product launch. + The product is a project management tool targeting small to medium businesses. + Please coordinate the team to create: + 1. Content strategy and editorial plan + 2. Creative campaign concepts and visual direction + 3. SEO strategy for organic growth + 4. Brand positioning and market differentiation + + Ensure all elements work together cohesively to drive awareness, engagement, and conversions.""" + + result = marketing_swarm.run(task=task) + print("=" * 80) + print("MARKETING SWARM RESULTS") + print("=" * 80) + print(result) diff --git a/swarms/structs/__init__.py b/swarms/structs/__init__.py index e40d22ce..b3871479 100644 --- a/swarms/structs/__init__.py +++ b/swarms/structs/__init__.py @@ -91,6 +91,8 @@ from swarms.structs.interactive_groupchat import ( random_dynamic_speaker, ) +from swarms.structs.hiearchical_swarm import HierarchicalSwarm + __all__ = [ "Agent", "BaseStructure", @@ -166,4 +168,5 @@ __all__ = [ "random_speaker", "priority_speaker", "random_dynamic_speaker", + "HierarchicalSwarm", ] diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py index ed4c9de7..5275f25d 100644 --- a/swarms/structs/agent.py +++ b/swarms/structs/agent.py @@ -2008,7 +2008,7 @@ class Agent: for doc in docs: data = data_to_text(doc) all_data.append(f"Document: {doc}\n{data}") - + # Combine all document content combined_data = "\n\n".join(all_data) diff --git a/swarms/structs/hiearchical_swarm.py b/swarms/structs/hiearchical_swarm.py index 7c5f37a8..84662839 100644 --- a/swarms/structs/hiearchical_swarm.py +++ b/swarms/structs/hiearchical_swarm.py @@ -11,7 +11,7 @@ Flow: """ import traceback -from typing import Any, List, Optional, Union, Callable +from typing import Any, List, Literal, Optional, Union, Callable from pydantic import BaseModel, Field @@ -56,6 +56,41 @@ class SwarmSpec(BaseModel): ) +SwarmType = Literal[ + "AgentRearrange", + "MixtureOfAgents", + "SpreadSheetSwarm", + "SequentialWorkflow", + "ConcurrentWorkflow", + "GroupChat", + "MultiAgentRouter", + "AutoSwarmBuilder", + "HiearchicalSwarm", + "auto", + "MajorityVoting", + "MALT", + "DeepResearchSwarm", + "CouncilAsAJudge", + "InteractiveGroupChat", +] + + +class SwarmRouterCall(BaseModel): + goal: str = Field( + ..., + description="The goal of the swarm router call. This is the goal that the swarm router will use to determine the best swarm to use.", + ) + swarm_type: SwarmType = Field( + ..., + description="The type of swarm to use. This is the type of swarm that the swarm router will use to determine the best swarm to use.", + ) + + task: str = Field( + ..., + description="The task to be executed by the swarm router. This is the task that the swarm router will use to determine the best swarm to use.", + ) + + class HierarchicalSwarm(BaseSwarm): """ _Representer a hierarchical swarm of agents, with a director that orchestrates tasks among the agents. diff --git a/swarms/utils/litellm_wrapper.py b/swarms/utils/litellm_wrapper.py index 52550800..aaa3f71e 100644 --- a/swarms/utils/litellm_wrapper.py +++ b/swarms/utils/litellm_wrapper.py @@ -236,9 +236,9 @@ class LiteLLM: # Extract mime type from the data URI or use default mime_type = "image/jpeg" # default if "data:" in image_url and ";base64," in image_url: - mime_type = image_url.split(";base64,")[0].split("data:")[ - 1 - ] + mime_type = image_url.split(";base64,")[0].split( + "data:" + )[1] # Ensure mime type is one of the supported formats supported_formats = [ @@ -298,7 +298,9 @@ class LiteLLM: # Add format for specific models extension = Path(image).suffix.lower() mime_type = ( - f"image/{extension[1:]}" if extension else "image/jpeg" + f"image/{extension[1:]}" + if extension + else "image/jpeg" ) vision_message["image_url"]["format"] = mime_type @@ -318,27 +320,41 @@ class LiteLLM: def _should_use_direct_url(self, image: str) -> bool: """ Determine if we should use direct URL passing instead of base64 conversion. - + Args: image (str): The image source (URL or file path) - + Returns: bool: True if we should use direct URL, False if we need base64 conversion """ # Only use direct URL for HTTP/HTTPS URLs if not image.startswith(("http://", "https://")): return False - + # Check for local/custom models that might not support direct URLs model_lower = self.model_name.lower() - local_indicators = ["localhost", "127.0.0.1", "local", "custom", "ollama", "llama-cpp"] - - is_local = any(indicator in model_lower for indicator in local_indicators) or \ - (self.base_url is not None and any(indicator in self.base_url.lower() for indicator in local_indicators)) - + local_indicators = [ + "localhost", + "127.0.0.1", + "local", + "custom", + "ollama", + "llama-cpp", + ] + + is_local = any( + indicator in model_lower for indicator in local_indicators + ) or ( + self.base_url is not None + and any( + indicator in self.base_url.lower() + for indicator in local_indicators + ) + ) + if is_local: return False - + # Use LiteLLM's supports_vision to check if model supports vision and direct URLs try: return supports_vision(model=self.model_name) @@ -351,22 +367,26 @@ class LiteLLM: """ Process the image for the given task. Handles different image formats and model requirements. - + This method now intelligently chooses between: 1. Direct URL passing (when model supports it and image is a URL) 2. Base64 conversion (for local files or unsupported models) - + This approach reduces server load and improves performance by avoiding unnecessary image downloads and base64 conversions when possible. """ logger.info(f"Processing image for model: {self.model_name}") - + # Log whether we're using direct URL or base64 conversion if self._should_use_direct_url(image): - logger.info(f"Using direct URL passing for image: {image[:100]}...") + logger.info( + f"Using direct URL passing for image: {image[:100]}..." + ) else: if image.startswith(("http://", "https://")): - logger.info("Converting URL image to base64 (model doesn't support direct URLs)") + logger.info( + "Converting URL image to base64 (model doesn't support direct URLs)" + ) else: logger.info("Converting local file to base64") @@ -414,13 +434,13 @@ class LiteLLM: def check_if_model_supports_vision(self, img: str = None): """ Check if the model supports vision capabilities. - + This method uses LiteLLM's built-in supports_vision function to verify that the model can handle image inputs before processing. - + Args: img (str, optional): Image path/URL to validate against model capabilities - + Raises: ValueError: If the model doesn't support vision and an image is provided """ diff --git a/tests/utils/test_litellm_wrapper.py b/tests/utils/test_litellm_wrapper.py index 3a657bae..a0a740f2 100644 --- a/tests/utils/test_litellm_wrapper.py +++ b/tests/utils/test_litellm_wrapper.py @@ -228,41 +228,49 @@ def run_test_suite(): llm = LiteLLM(model_name="gpt-4o") # Mock image URL to test message structure test_img = "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true" - messages = llm._prepare_messages("Describe this image", img=test_img) + messages = llm._prepare_messages( + "Describe this image", img=test_img + ) assert isinstance(messages, list) assert len(messages) >= 1 # Check if image content is properly structured - user_message = next((msg for msg in messages if msg["role"] == "user"), None) + user_message = next( + (msg for msg in messages if msg["role"] == "user"), None + ) assert user_message is not None log_test_result("Message Preparation with Image", True) except Exception as e: - log_test_result("Message Preparation with Image", False, str(e)) + log_test_result( + "Message Preparation with Image", False, str(e) + ) # Test 11: Vision Processing Methods try: logger.info("Testing vision processing methods") llm = LiteLLM(model_name="gpt-4o") messages = [] - + # Test OpenAI vision processing processed_messages = llm.openai_vision_processing( - "Describe this image", - "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", - messages.copy() + "Describe this image", + "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", + messages.copy(), ) assert isinstance(processed_messages, list) assert len(processed_messages) > 0 - + # Test Anthropic vision processing - llm_anthropic = LiteLLM(model_name="claude-3-5-sonnet-20241022") + llm_anthropic = LiteLLM( + model_name="claude-3-5-sonnet-20241022" + ) processed_messages_anthropic = llm_anthropic.anthropic_vision_processing( - "Describe this image", - "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", - messages.copy() + "Describe this image", + "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", + messages.copy(), ) assert isinstance(processed_messages_anthropic, list) assert len(processed_messages_anthropic) > 0 - + log_test_result("Vision Processing Methods", True) except Exception as e: log_test_result("Vision Processing Methods", False, str(e)) @@ -271,20 +279,22 @@ def run_test_suite(): try: logger.info("Testing local vs URL detection") llm = LiteLLM(model_name="gpt-4o") - + # Test URL detection url_test = "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true" is_url_direct = llm._should_use_direct_url(url_test) - + # Test local file detection local_test = "/path/to/local/image.jpg" is_local_direct = llm._should_use_direct_url(local_test) - + # URLs should potentially use direct, local files should not assert isinstance(is_url_direct, bool) assert isinstance(is_local_direct, bool) - assert is_local_direct == False # Local files should never use direct URL - + assert ( + is_local_direct == False + ) # Local files should never use direct URL + log_test_result("Local vs URL Detection", True) except Exception as e: log_test_result("Local vs URL Detection", False, str(e)) @@ -294,22 +304,22 @@ def run_test_suite(): logger.info("Testing vision message structure") llm = LiteLLM(model_name="gpt-4o") messages = [] - + # Test message structure for image input result = llm.vision_processing( task="What do you see?", image="https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", - messages=messages + messages=messages, ) - + assert isinstance(result, list) assert len(result) > 0 - + # Verify the message contains both text and image components user_msg = result[-1] # Last message should be user message assert user_msg["role"] == "user" assert "content" in user_msg - + log_test_result("Vision Message Structure", True) except Exception as e: log_test_result("Vision Message Structure", False, str(e))