diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 00000000..72ccc40a --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,12 @@ +# this is a config file for the github action labeler + +# Add 'label1' to any changes within 'example' folder or any subfolders +example_change: +- example/** + +# Add 'label2' to any file changes within 'example2' folder +example2_change: example2/* + +# Add label3 to any change to .txt files within the entire repository. Quotation marks are required for the leading asterisk +text_files: +- '**/*.txt' \ No newline at end of file diff --git a/.github/workflows/welcome.yml b/.github/workflows/welcome.yml index eadc0b68..25edc27c 100644 --- a/.github/workflows/welcome.yml +++ b/.github/workflows/welcome.yml @@ -9,6 +9,7 @@ on: jobs: build: name: 👋 Welcome + permissions: write-all runs-on: ubuntu-latest steps: - uses: actions/first-interaction@v1.2.0 diff --git a/.gitignore b/.gitignore index a336e116..767abb9d 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ error.txt # C extensions *.so +.ruff_cache errors.txt diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..afbec392 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +kye@apac.ai. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bd9090de..f57d5485 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -100,6 +100,35 @@ You can learn more about mkdocs on the [mkdocs website](https://www.mkdocs.org/) - Run all the tests in the tests folder `find ./tests -name '*.py' -exec pytest {} \;` +## Code Quality +`quality.sh` runs 4 different code formatters for ultra reliable code cleanup using Autopep8, Black, Ruff, YAPF +1. Open your terminal. + +2. Change directory to where `quality.sh` is located using `cd` command: + ```sh + cd /path/to/directory + ``` + +3. Make sure the script has execute permissions: + ```sh + chmod +x code_quality.sh + ``` + +4. Run the script: + ```sh + ./quality.sh + ``` + +If the script requires administrative privileges, you might need to run it with `sudo`: +```sh +sudo ./quality.sh +``` + +Please replace `/path/to/directory` with the actual path where the `quality.sh` script is located on your system. + +If you're asking for a specific content or functionality inside `quality.sh` related to YAPF or other code quality tools, you would need to edit the `quality.sh` script to include the desired commands, such as running YAPF on a directory. The contents of `quality.sh` would dictate exactly what happens when you run it. + + ## 📄 license -By contributing, you agree that your contributions will be licensed under an [MIT license](https://github.com/kyegomez/swarms/blob/develop/LICENSE.md). \ No newline at end of file +By contributing, you agree that your contributions will be licensed under an [MIT license](https://github.com/kyegomez/swarms/blob/develop/LICENSE.md). diff --git a/Dockerfile b/Dockerfile index ee212c8c..435595b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,9 @@ FROM python:3.8-slim-buster -WORKDIR /home/zack/code/swarms/* +WORKDIR /usr/src/app -ADD . /home/zack/code/swarms/* +ADD . . RUN pip install --no-cache-dir -r requirements.txt EXPOSE 8000 - diff --git a/README.md b/README.md index 68d7ba05..abc6ab69 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,10 @@ Book a [1-on-1 Session with Kye](https://calendly.com/swarm-corp/30min), the Cre We have a small gallery of examples to run here, [for more check out the docs to build your own agent and or swarms!](https://docs.apac.ai) ### `Flow` Example -- The `Flow` is a superior iteratioin of the `LLMChain` from Langchain, our intent with `Flow` is to create the most reliable loop structure that gives the agents their "autonomy" through 3 main methods of interaction, one through user specified loops, then dynamic where the agent parses a token, and or an interactive human input verison, or a mix of all 3. +- Reliable Structure that provides LLMS autonomy +- Extremely Customizeable with stopping conditions, interactivity, dynamical temperature, loop intervals, and so much more +- Enterprise Grade + Production Grade: `Flow` is designed and optimized for automating real-world tasks at scale! + ```python from swarms.models import OpenAIChat @@ -47,71 +50,86 @@ from swarms.structs import Flow api_key = "" - -# Initialize the language model, -# This model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC +# Initialize the language model, this model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC llm = OpenAIChat( + # model_name="gpt-4" openai_api_key=api_key, temperature=0.5, + # max_tokens=100, ) -# Initialize the flow +## Initialize the workflow flow = Flow( llm=llm, - max_loops=5, + max_loops=2, + dashboard=True, + # stopping_condition=None, # You can define a stopping condition as needed. + # loop_interval=1, + # retry_attempts=3, + # retry_interval=1, + # interactive=False, # Set to 'True' for interactive mode. + # dynamic_temperature=False, # Set to 'True' for dynamic temperature handling. ) -out = flow.run("Generate a 10,000 word blog, say Stop when done") -print(out) +# out = flow.load_state("flow_state.json") +# temp = flow.dynamic_temperature() +# filter = flow.add_response_filter("Trump") +out = flow.run("Generate a 10,000 word blog on health and wellness.") +# out = flow.validate_response(out) +# out = flow.analyze_feedback(out) +# out = flow.print_history_and_memory() +# # out = flow.save_state("flow_state.json") +# print(out) + ``` +------ -## `GodMode` -- A powerful tool for concurrent execution of tasks using multiple Language Model (LLM) instances. +### `SequentialWorkflow` +- A Sequential swarm of autonomous agents where each agent's outputs are fed into the next agent +- Save and Restore Workflow states! +- Integrate Flow's with various LLMs and Multi-Modality Models ```python -from swarms.swarms import GodMode from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow -api_key = "" +# Example usage +api_key = ( + "" # Your actual API key here +) +# Initialize the language flow llm = OpenAIChat( - openai_api_key=api_key + openai_api_key=api_key, + temperature=0.5, + max_tokens=3000, ) +# Initialize the Flow with the language flow +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) -llms = [ - llm, - llm, - llm -] +# Create another Flow for a different task +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) -god_mode = GodMode(llms) - -task = 'Generate a 10,000 word blog on health and wellness.' - -out = god_mode.run(task) -god_mode.print_responses(task) -``` - ------- - -### `OmniModalAgent` -- OmniModal Agent is an LLM that access to 10+ multi-modal encoders and diffusers! It can generate images, videos, speech, music and so much more, get started with: - -```python -from swarms.models import OpenAIChat -from swarms.agents import OmniModalAgent +# Create the workflow +workflow = SequentialWorkflow(max_loops=1) -api_key = "SK-" +# Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) -llm = OpenAIChat(model_name="gpt-4", openai_api_key=api_key) +# Suppose the next task takes the output of the first task as input +workflow.add("Summarize the generated blog", flow2) -agent = OmniModalAgent(llm) +# Run the workflow +workflow.run() -agent.run("Create a video of a swarm of fish") +# Output the results +for task in workflow.tasks: + print(f"Task: {task.description}, Result: {task.result}") ``` @@ -122,8 +140,10 @@ agent.run("Create a video of a swarm of fish") ## Contribute +- We're always looking for contributors to help us improve and expand this project. If you're interested, please check out our [Contributing Guidelines](CONTRIBUTING.md) and our [contributing board](https://github.com/users/kyegomez/projects/1) -We're always looking for contributors to help us improve and expand this project. If you're interested, please check out our [Contributing Guidelines](CONTRIBUTING.md) and our [contributing board](https://github.com/users/kyegomez/projects/1) +## Community +- [Join the Swarms community here on Discord!](https://discord.gg/AJazBmhKnr) # License diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..2de3c275 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,32 @@ +# Security Policy +=============== + +## Supported Versions +------------------ + +* * * * * + +| Version | Supported | +| --- | --- | +| 2.0.5 | :white_check_mark: | +| 2.0.4 | :white_check_mark: | +| 2.0.3 | :white_check_mark: | +| 2.0.2 | :white_check_mark: | +| 2.0.1 | :white_check_mark: | +| 2.0.0 | :white_check_mark: | + +# Reporting a Vulnerability +------------------------- + +* * * * * + +If you discover a security vulnerability in any of the above versions, please report it immediately to our security team by sending an email to kye@apac.ai. We take security vulnerabilities seriously and appreciate your efforts in disclosing them responsibly. + +Please provide detailed information on the vulnerability, including steps to reproduce, potential impact, and any known mitigations. Our security team will acknowledge receipt of your report within 24 hours and will provide regular updates on the progress of the investigation. + +Once the vulnerability has been thoroughly assessed, we will take the necessary steps to address it. This may include releasing a security patch, issuing a security advisory, or implementing other appropriate mitigations. + +We aim to respond to all vulnerability reports in a timely manner and work towards resolving them as quickly as possible. We thank you for your contribution to the security of our software. + +Please note that any vulnerability reports that are not related to the specified versions or do not provide sufficient information may be declined. + diff --git a/code_quality.sh b/code_quality.sh new file mode 100755 index 00000000..032085ca --- /dev/null +++ b/code_quality.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Navigate to the directory containing the 'swarms' folder +# cd /path/to/your/code/directory + +# Run autopep8 with max aggressiveness (-aaa) and in-place modification (-i) +# on all Python files (*.py) under the 'swarms' directory. +autopep8 --in-place --aggressive --aggressive --recursive --experimental --list-fixes swarms/ + +# Run black with default settings, since black does not have an aggressiveness level. +# Black will format all Python files it finds in the 'swarms' directory. +black --experimental-string-processing swarms/ + +# Run ruff on the 'swarms' directory. +# Add any additional flags if needed according to your version of ruff. +ruff swarms/ + +# YAPF +yapf --recursive --in-place --verbose --style=google --parallel swarms diff --git a/demos/accountant_team/accountant_team.py b/demos/accountant_team/accountant_team.py new file mode 100644 index 00000000..7eadec96 --- /dev/null +++ b/demos/accountant_team/accountant_team.py @@ -0,0 +1,35 @@ +import re +from swarms.models.nougat import Nougat +from swarms.structs import Flow +from swarms.models import OpenAIChat +from swarms.models import LayoutLMDocumentQA + +# # URL of the image of the financial document +IMAGE_OF_FINANCIAL_DOC_URL = "bank_statement_2.jpg" + +# Example usage +api_key = "" + +# Initialize the language flow +llm = OpenAIChat( + openai_api_key=api_key, +) + +# LayoutLM Document QA +pdf_analyzer = LayoutLMDocumentQA() + +question = "What is the total amount of expenses?" +answer = pdf_analyzer( + question, + IMAGE_OF_FINANCIAL_DOC_URL, +) + +# Initialize the Flow with the language flow +agent = Flow(llm=llm) +SUMMARY_AGENT_PROMPT = f""" +Generate an actionable summary of this financial document be very specific and precise, provide bulletpoints be very specific provide methods of lowering expenses: {answer}" +""" + +# Add tasks to the workflow +summary_agent = agent.run(SUMMARY_AGENT_PROMPT) +print(summary_agent) diff --git a/demos/accountant_team/bank_statement_2.jpg b/demos/accountant_team/bank_statement_2.jpg new file mode 100644 index 00000000..dbc8a4e9 Binary files /dev/null and b/demos/accountant_team/bank_statement_2.jpg differ diff --git a/demos/autotemp.py b/demos/autotemp.py new file mode 100644 index 00000000..dcde42d3 --- /dev/null +++ b/demos/autotemp.py @@ -0,0 +1,101 @@ +import re +from concurrent.futures import ThreadPoolExecutor, as_completed +from swarms.models import OpenAIChat + + +class AutoTempAgent: + """ + AutoTemp is a tool for automatically selecting the best temperature setting for a given task. + + Flow: + 1. Generate outputs at a range of temperature settings. + 2. Evaluate each output using the default temperature setting. + 3. Select the best output based on the evaluation score. + 4. Return the best output. + + + Args: + temperature (float, optional): The default temperature setting to use. Defaults to 0.5. + api_key (str, optional): Your OpenAI API key. Defaults to None. + alt_temps ([type], optional): A list of alternative temperature settings to try. Defaults to None. + auto_select (bool, optional): If True, the best temperature setting will be automatically selected. Defaults to True. + max_workers (int, optional): The maximum number of workers to use when generating outputs. Defaults to 6. + + Returns: + [type]: [description] + + Examples: + >>> from swarms.demos.autotemp import AutoTemp + >>> autotemp = AutoTemp() + >>> autotemp.run("Generate a 10,000 word blog on mental clarity and the benefits of meditation.", "0.4,0.6,0.8,1.0,1.2,1.4") + Best AutoTemp Output (Temp 0.4 | Score: 100.0): + Generate a 10,000 word blog on mental clarity and the benefits of meditation. + + """ + + def __init__( + self, + temperature: float = 0.5, + api_key: str = None, + alt_temps=None, + auto_select=True, + max_workers=6, + ): + self.alt_temps = alt_temps if alt_temps else [0.4, 0.6, 0.8, 1.0, 1.2, 1.4] + self.auto_select = auto_select + self.max_workers = max_workers + self.temperature = temperature + self.alt_temps = alt_temps + self.llm = OpenAIChat( + openai_api_key=api_key, + temperature=temperature, + ) + + def evaluate_output(self, output: str): + """Evaluate the output using the default temperature setting.""" + eval_prompt = f""" + Evaluate the following output which was generated at a temperature setting of {self.temperature}. + Provide a precise score from 0.0 to 100.0, considering the criteria of relevance, clarity, utility, pride, and delight. + + Output to evaluate: + --- + {output} + --- + """ + score_text = self.llm(prompt=eval_prompt) + score_match = re.search(r"\b\d+(\.\d)?\b", score_text) + return round(float(score_match.group()), 1) if score_match else 0.0 + + def run(self, task: str, temperature_string): + """Run the AutoTemp agent.""" + temperature_list = [ + float(temp.strip()) for temp in temperature_string.split(",") + ] + outputs = {} + scores = {} + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_temp = { + executor.submit(self.llm.generate, task, temp): temp + for temp in temperature_list + } + for future in as_completed(future_to_temp): + temp = future_to_temp[future] + output_text = future.result() + outputs[temp] = output_text + scores[temp] = self.evaluate_output(output_text, temp) + + if not scores: + return "No valid outputs generated.", None + + sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True) + best_temp, best_score = sorted_scores[0] + best_output = outputs[best_temp] + + return ( + f"Best AutoTemp Output (Temp {best_temp} | Score: {best_score}):\n{best_output}" + if self.auto_select + else "\n".join( + f"Temp {temp} | Score: {score}:\n{outputs[temp]}" + for temp, score in sorted_scores + ) + ) diff --git a/demos/multi_modal_auto_agent.py b/demos/multi_modal_auto_agent.py new file mode 100644 index 00000000..b462795f --- /dev/null +++ b/demos/multi_modal_auto_agent.py @@ -0,0 +1,30 @@ +from swarms.structs import Flow +from swarms.models import Idefics + +# Multi Modality Auto Agent +llm = Idefics(max_length=2000) + +task = "User: What is in this image? https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG" + +## Initialize the workflow +flow = Flow( + llm=llm, + max_loops=2, + dashboard=True, + # stopping_condition=None, # You can define a stopping condition as needed. + # loop_interval=1, + # retry_attempts=3, + # retry_interval=1, + # interactive=False, # Set to 'True' for interactive mode. + # dynamic_temperature=False, # Set to 'True' for dynamic temperature handling. +) + +# out = flow.load_state("flow_state.json") +# temp = flow.dynamic_temperature() +# filter = flow.add_response_filter("Trump") +out = flow.run(task) +# out = flow.validate_response(out) +# out = flow.analyze_feedback(out) +# out = flow.print_history_and_memory() +# # out = flow.save_state("flow_state.json") +# print(out) diff --git a/demos/positive_med.py b/demos/positive_med.py index e8f879c9..88226545 100644 --- a/demos/positive_med.py +++ b/demos/positive_med.py @@ -23,7 +23,7 @@ Distribution Agent: """ -from swarms import OpenAIChat +from swarms.models import OpenAIChat from termcolor import colored TOPIC_GENERATOR = f""" diff --git a/demos/ui_software_demo.py b/demos/ui_software_demo.py new file mode 100644 index 00000000..d322f71b --- /dev/null +++ b/demos/ui_software_demo.py @@ -0,0 +1,5 @@ +""" +Autonomous swarm that optimizes UI autonomously + +GPT4Vision ->> GPT4 ->> UI +""" diff --git a/docs/architecture.md b/docs/corporate/architecture.md similarity index 100% rename from docs/architecture.md rename to docs/corporate/architecture.md diff --git a/docs/bounties.md b/docs/corporate/bounties.md similarity index 100% rename from docs/bounties.md rename to docs/corporate/bounties.md diff --git a/docs/checklist.md b/docs/corporate/checklist.md similarity index 100% rename from docs/checklist.md rename to docs/corporate/checklist.md diff --git a/docs/cost_analysis.md b/docs/corporate/cost_analysis.md similarity index 100% rename from docs/cost_analysis.md rename to docs/corporate/cost_analysis.md diff --git a/docs/demos.md b/docs/corporate/demos.md similarity index 100% rename from docs/demos.md rename to docs/corporate/demos.md diff --git a/docs/design.md b/docs/corporate/design.md similarity index 100% rename from docs/design.md rename to docs/corporate/design.md diff --git a/docs/distribution.md b/docs/corporate/distribution.md similarity index 100% rename from docs/distribution.md rename to docs/corporate/distribution.md diff --git a/docs/failures.md b/docs/corporate/failures.md similarity index 100% rename from docs/failures.md rename to docs/corporate/failures.md diff --git a/docs/faq.md b/docs/corporate/faq.md similarity index 100% rename from docs/faq.md rename to docs/corporate/faq.md diff --git a/docs/flywheel.md b/docs/corporate/flywheel.md similarity index 100% rename from docs/flywheel.md rename to docs/corporate/flywheel.md diff --git a/docs/hiring.md b/docs/corporate/hiring.md similarity index 100% rename from docs/hiring.md rename to docs/corporate/hiring.md diff --git a/docs/metric.md b/docs/corporate/metric.md similarity index 100% rename from docs/metric.md rename to docs/corporate/metric.md diff --git a/docs/purpose.md b/docs/corporate/purpose.md similarity index 100% rename from docs/purpose.md rename to docs/corporate/purpose.md diff --git a/docs/research.md b/docs/corporate/research.md similarity index 100% rename from docs/research.md rename to docs/corporate/research.md diff --git a/docs/roadmap.md b/docs/corporate/roadmap.md similarity index 100% rename from docs/roadmap.md rename to docs/corporate/roadmap.md diff --git a/docs/examples/ideas.md b/docs/examples/ideas.md new file mode 100644 index 00000000..a0a9c9b7 --- /dev/null +++ b/docs/examples/ideas.md @@ -0,0 +1,63 @@ +# 2O+ Autonomous Agent Blogs + +1. **The Ultimate Guide to Deploying Production-Ready Autonomous Agents with Swarms** + - A comprehensive start-to-finish guide on implementing Swarms in a production environment. + +2. **5 Steps to Elevate Your AI with Swarms Multi-Modal Autonomous Agents** + - A walkthrough highlighting the simplicity of Swarms’ setup and deployment for various AI applications. + +3. **Integrating Swarms Into Your Enterprise Workflow: A Step-By-Step Tutorial** + - A practical guide focusing on integrating Swarms into existing enterprise systems. + +4. **Swarms’ Flow: Streamlining AI Deployment in Your Business** + - Exploring the benefits and technicalities of using the Flow feature to simplify complex AI workflows. + +5. **From Zero to Hero: Building Your First Enterprise-Grade AI Agent with Swarms** + - A beginner-friendly walkthrough for building and deploying an AI agent using Swarms. + +6. **Scaling AI with Swarms: Managing Multi-Agent Systems Efficiently** + - Strategies and best practices for scaling multi-agent systems in enterprise settings. + +7. **Creating Resilient AI Systems with Swarms' Autonomous Agents** + - Discussing the robustness of Swarms agents and how they maintain performance under stress. + +8. **Unlocking New Capabilities: Advanced Features of Swarms for AI Engineers** + - Diving into the more sophisticated features of Swarms and how they can be leveraged in complex projects. + +9. **Swarms’ Quick Wins: Implementing AI Agents in Less Than 5 Lines of Code** + - A focused guide on rapidly deploying functional AI agents with minimal coding. + +10. **Benchmarking Your AI: Performance Metrics with Swarms** + - How to use Swarms to measure and optimize the performance of AI agents. + +11. **Swarms Case Studies: Real-World Success Stories from AI Engineers** + - Sharing stories and testimonials of how various organizations successfully implemented Swarms. + +12. **Effortless Multi-Modal Model Deployment: A Swarms Walkthrough** + - Explaining how to use Swarms to deploy multi-modal models with ease. + +13. **Future-Proof Your AI: Adapting to New Tech with Swarms** + - How Swarms' flexible architecture allows for easy updates and adaptation to new AI technologies. + +14. **Enterprise AI Security: Ensuring Your Swarms Agents are Hack-Proof** + - Best practices for securing autonomous agents in enterprise applications. + +15. **Migrating to Swarms: Transitioning From Legacy Systems** + - A guide for AI engineers on migrating existing AI systems to Swarms without downtime. + +16. **Multi-Agent Collaboration: How Swarms Facilitates Teamwork Among AI** + - An insight into how Swarms allows for multiple AI agents to work together seamlessly. + +17. **The Engineer's Toolkit: Swarms' Features Every AI Developer Must Know** + - Highlighting the most useful tools and features of Swarms from an AI developer’s perspective. + +18. **Swarms for Different Industries: Customizing AI Agents for Niche Markets** + - Exploring how Swarms can be tailored to fit the needs of various industries such as healthcare, finance, and retail. + +19. **Building Intelligent Workflows with Swarms’ Flow** + - A tutorial on using the Flow feature to create intelligent, responsive AI-driven workflows. + +20. **Troubleshooting Common Issues When Deploying Swarms Autonomous Agents** + - A problem-solving guide for AI engineers on overcoming common challenges when implementing Swarms agents. + +Each blog or walkthrough can be structured to not only showcase the functionality and benefits of the Swarms framework but also to establish the brand as a thought leader in the space of enterprise AI solutions. \ No newline at end of file diff --git a/docs/examples/reliable_autonomous_agents.md b/docs/examples/reliable_autonomous_agents.md new file mode 100644 index 00000000..f2988075 --- /dev/null +++ b/docs/examples/reliable_autonomous_agents.md @@ -0,0 +1,239 @@ +# Enterprise-Grade Workflow Automation With Autonomous Agents +======================================================================== + +Welcome to this comprehensive walkthrough guide tutorial on the SequentialWorkflow feature of the Swarms Framework! In this tutorial, we will explore the purpose, usage, and key concepts of the SequentialWorkflow class, which is a part of the swarms package. Whether you are a beginner, intermediate, or expert developer, this tutorial will provide you with a clear understanding of how to effectively use the SequentialWorkflow class in your projects. + +AI engineering is a dynamic and evolving field that involves the development and deployment of intelligent systems and applications. In this ever-changing landscape, AI engineers often face the challenge of orchestrating complex sequences of tasks, managing data flows, and ensuring the smooth execution of AI workflows. This is where the Workflow Class, such as the SequentialWorkflow class we discussed earlier, plays a pivotal role in enabling AI engineers to achieve their goals efficiently and effectively. + +## The Versatile World of AI Workflows +AI workflows encompass a wide range of tasks and processes, from data preprocessing and model training to natural language understanding and decision-making. These workflows are the backbone of AI systems, guiding them through intricate sequences of actions to deliver meaningful results. Here are some of the diverse use cases where the Workflow Class can empower AI engineers: + +### 1. Natural Language Processing (NLP) Pipelines +AI engineers often build NLP pipelines that involve multiple stages such as text preprocessing, tokenization, feature extraction, model inference, and post-processing. The Workflow Class enables the orderly execution of these stages, ensuring that textual data flows seamlessly through each step, resulting in accurate and coherent NLP outcomes. + +### 2. Data Ingestion and Transformation +AI projects frequently require the ingestion of diverse data sources, including structured databases, unstructured text, and multimedia content. The Workflow Class can be used to design data ingestion workflows that extract, transform, and load (ETL) data efficiently, making it ready for downstream AI tasks like training and analysis. + +### 3. Autonomous Agents and Robotics +In autonomous robotics and intelligent agent systems, workflows are essential for decision-making, sensor fusion, motion planning, and control. AI engineers can use the Workflow Class to create structured sequences of actions that guide robots and agents through dynamic environments, enabling them to make informed decisions and accomplish tasks autonomously. + +### 4. Machine Learning Model Training +Training machine learning models involves a series of steps, including data preprocessing, feature engineering, model selection, hyperparameter tuning, and evaluation. The Workflow Class simplifies the orchestration of these steps, allowing AI engineers to experiment with different configurations and track the progress of model training. + +### 5. Content Generation and Summarization +AI-driven content generation tasks, such as generating articles, reports, or summaries, often require multiple steps, including content creation and post-processing. The Workflow Class can be used to create content generation workflows, ensuring that the generated content meets quality and coherence criteria. + +### 6. Adaptive Decision-Making +In AI systems that make real-time decisions based on changing data and environments, workflows facilitate adaptive decision-making. Engineers can use the Workflow Class to design decision-making pipelines that take into account the latest information and make informed choices. + +## Enabling Efficiency and Maintainability +The Workflow Class provides AI engineers with a structured and maintainable approach to building, executing, and managing complex AI workflows. It offers the following advantages: + +- Modularity: Workflows can be modularly designed, allowing engineers to focus on individual task implementations and ensuring code reusability. + +- Debugging and Testing: The Workflow Class simplifies debugging and testing by providing a clear sequence of tasks and well-defined inputs and outputs for each task. + +- Scalability: As AI projects grow in complexity, the Workflow Class can help manage and scale workflows by adding or modifying tasks as needed. + +- Error Handling: The class supports error handling strategies, enabling engineers to define how to handle unexpected failures gracefully. + +- Maintainability: With structured workflows, AI engineers can easily maintain and update AI systems as requirements evolve or new data sources become available. + +The Workflow Class, such as the SequentialWorkflow class, is an indispensable tool in the toolkit of AI engineers. It empowers engineers to design, execute, and manage AI workflows across a diverse range of use cases. By providing structure, modularity, and maintainability to AI projects, the Workflow Class contributes significantly to the efficiency and success of AI engineering endeavors. As the field of AI continues to advance, harnessing the power of workflow orchestration will remain a key ingredient in building intelligent and adaptable systems, now let’s get started with SequentialWorkflow. + +## Official Swarms Links +Here is the Swarms website: + +Here is the Swarms Github: + +Here are the Swarms docs: + +And, join the Swarm community! + +Book a call with The Swarm Corporation here if you’re interested in high performance custom swarms! + +Now let’s begin… + +## Installation +Before we dive into the tutorial, make sure you have the following prerequisites in place: + +Python installed on your system. +The swarms library installed. You can install it via pip using the following command: + +`pip3 install --upgrade swarms` + +Additionally, you will need an API key for the OpenAIChat model to run the provided code examples. Replace "YOUR_API_KEY" with your actual API key in the code examples where applicable. + +## Getting Started +Let’s start by importing the necessary modules and initializing the OpenAIChat model, which we will use in our workflow tasks. + + +```python +from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow + +# Replace "YOUR_API_KEY" with your actual OpenAI API key +api_key = "YOUR_API_KEY" + +# Initialize the language model flow (e.g., GPT-3) +llm = OpenAIChat( + openai_api_key=api_key, + temperature=0.5, + max_tokens=3000, +) +We have initialized the OpenAIChat model, which will be used as a callable object in our tasks. Now, let’s proceed to create the SequentialWorkflow. + +Creating a SequentialWorkflow +To create a SequentialWorkflow, follow these steps: + +# Initialize Flows for individual tasks +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) +# Create the Sequential Workflow +workflow = SequentialWorkflow(max_loops=1) +`````` +In this code snippet, we have initialized two Flow instances (flow1 and flow2) representing individual tasks within our workflow. These flows will use the OpenAIChat model we initialized earlier. We then create a SequentialWorkflow instance named workflow with a maximum loop count of 1. The max_loops parameter determines how many times the entire workflow can be run, and we set it to 1 for this example. + +Adding Tasks to the SequentialWorkflow +Now that we have created the SequentialWorkflow, let’s add tasks to it. In our example, we’ll create two tasks: one for generating a 10,000-word blog on “health and wellness” and another for summarizing the generated blog. + +``` +### Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) + +`workflow.add("Summarize the generated blog", flow2)` + +The workflow.add() method is used to add tasks to the workflow. Each task is described using a human-readable description, such as "Generate a 10,000 word blog on health and wellness," and is associated with a flow (callable object) that will be executed as the task. In our example, flow1 and flow2 represent the tasks. + +Running the SequentialWorkflow +With tasks added to the SequentialWorkflow, we can now run the workflow sequentially using the workflow.run() method. + +### Run the workflow +`workflow.run()` +Executing workflow.run() will start the execution of tasks in the order they were added to the workflow. In our example, it will first generate the blog and then summarize it. + +Accessing Task Results +After running the workflow, you can access the results of each task using the get_task_results() method. + +# Get and display the results of each task in the workflow +```python +results = workflow.get_task_results() +for task_description, result in results.items(): + print(f"Task: {task_description}, Result: {result}") +``` +The workflow.get_task_results() method returns a dictionary where the keys are task descriptions, and the values are the corresponding results. You can then iterate through the results and print them, as shown in the code snippet. + +Resetting a SequentialWorkflow +Sometimes, you might need to reset a SequentialWorkflow to start fresh. You can use the workflow.reset_workflow() method for this purpose. + +### Reset the workflow +`workflow.reset_workflow()` +Resetting the workflow clears the results of each task, allowing you to rerun the workflow from the beginning without reinitializing it. + +Updating Task Arguments +You can also update the arguments of a specific task in the workflow using the workflow.update_task() method. + +### Update the arguments of a specific task in the workflow +`workflow.update_task("Generate a 10,000 word blog on health and wellness.", max_loops=2)` + +In this example, we update the max_loops argument of the task with the description "Generate a 10,000 word blog on health and wellness" to 2. This can be useful if you want to change the behavior of a specific task without recreating the entire workflow. + +# Conclusion: Mastering Workflow Orchestration in AI Engineering +In the ever-evolving landscape of artificial intelligence (AI), where the pace of innovation and complexity of tasks are ever-increasing, harnessing the power of workflow orchestration is paramount. In this comprehensive walkthrough guide, we’ve embarked on a journey through the world of workflow orchestration, focusing on the Workflow Class, with a specific emphasis on the SequentialWorkflow class. As we conclude this exploration, we’ve delved deep into the intricacies of orchestrating AI workflows, and it’s time to reflect on the valuable insights gained and the immense potential that this knowledge unlocks for AI engineers. + +## The Art of Workflow Orchestration +At its core, workflow orchestration is the art of designing, managing, and executing sequences of tasks or processes in a structured and efficient manner. In the realm of AI engineering, where tasks can range from data preprocessing and model training to decision-making and autonomous actions, mastering workflow orchestration is a game-changer. It empowers AI engineers to streamline their work, ensure reliable execution, and deliver impactful results. + +The Workflow Class, and particularly the SequentialWorkflow class we’ve explored, acts as a guiding light in this intricate journey. It provides AI engineers with a toolbox of tools and techniques to conquer the challenges of orchestrating AI workflows effectively. Through a disciplined approach and adherence to best practices, AI engineers can achieve the following: + +### 1. Structured Workflow Design +A well-structured workflow is the cornerstone of any successful AI project. The Workflow Class encourages AI engineers to break down complex tasks into manageable units. Each task becomes a building block that contributes to the overarching goal. Whether it’s preprocessing data, training a machine learning model, or generating content, structured workflow design ensures clarity, modularity, and maintainability. + +### 2. Efficient Task Sequencing +In AI, the order of tasks often matters. One task’s output can be another task’s input, and ensuring the correct sequence of execution is crucial. The SequentialWorkflow class enforces this sequential execution, eliminating the risk of running tasks out of order. It ensures that the workflow progresses systematically, following the predefined sequence of tasks. + +### 3. Error Resilience and Recovery +AI systems must be resilient in the face of unexpected errors and failures. The Workflow Class equips AI engineers with error handling strategies, such as retries and fallbacks. These strategies provide the ability to gracefully handle issues, recover from failures, and continue the workflow’s execution without disruption. + +### 4. Code Modularity and Reusability +Building AI workflows often involves implementing various tasks, each with its own logic. The Workflow Class encourages code modularity, allowing AI engineers to encapsulate tasks as separate units. This modularity promotes code reusability, making it easier to adapt and expand workflows as AI projects evolve. + +### 5. Efficient Debugging and Testing +Debugging and testing AI workflows can be challenging without clear structure and boundaries. The Workflow Class provides a clear sequence of tasks with well-defined inputs and outputs. This structure simplifies the debugging process, as AI engineers can isolate and test individual tasks, ensuring that each component functions as intended. + +### 6. Scalability and Adaptability +As AI projects grow in complexity, the Workflow Class scales effortlessly. AI engineers can add or modify tasks as needed, accommodating new data sources, algorithms, or requirements. This scalability ensures that workflows remain adaptable to changing demands and evolving AI landscapes. + +### 7. Maintainability and Future-Proofing +Maintaining AI systems over time is a crucial aspect of engineering. The Workflow Class fosters maintainability by providing a clear roadmap of tasks and their interactions. AI engineers can revisit, update, and extend workflows with confidence, ensuring that AI systems remain effective and relevant in the long run. + +## Empowering AI Engineers +The knowledge and skills gained from this walkthrough guide go beyond technical proficiency. They empower AI engineers to be architects of intelligent systems, capable of orchestrating AI workflows that solve real-world problems. The Workflow Class is a versatile instrument in their hands, enabling them to tackle diverse use cases and engineering challenges. + +## Diverse Use Cases for Workflow Class +Throughout this guide, we explored a myriad of use cases where the Workflow Class shines: + +Natural Language Processing (NLP) Pipelines: In NLP, workflows involve multiple stages, and the Workflow Class ensures orderly execution, resulting in coherent NLP outcomes. + +Data Ingestion and Transformation: Data is the lifeblood of AI, and structured data workflows ensure efficient data preparation for downstream tasks. + +Autonomous Agents and Robotics: For robots and intelligent agents, workflows enable autonomous decision-making and task execution. + +Machine Learning Model Training: Model training workflows encompass numerous steps, and structured orchestration simplifies the process. + +Content Generation and Summarization: Workflows for content generation ensure that generated content meets quality and coherence criteria. + +Adaptive Decision-Making: In dynamic environments, workflows facilitate adaptive decision-making based on real-time data. + +## Efficiency and Maintainability +AI engineers not only have the tools to tackle these use cases but also the means to do so efficiently. The Workflow Class fosters efficiency and maintainability, making AI engineering endeavors more manageable: + +- Modularity: Encapsulate tasks as separate units, promoting code reusability and maintainability. + +- Debugging and Testing: Streamline debugging and testing through clear task boundaries and well-defined inputs and outputs. + +- Scalability: As AI projects grow, workflows scale with ease, accommodating new components and requirements. +Error Handling: Gracefully handle errors and failures, ensuring that AI systems continue to operate smoothly. + +- Maintainability: AI systems remain adaptable and maintainable, even as the AI landscape evolves and requirements change. + +## The Future of AI Engineering +As AI engineering continues to advance, workflow orchestration will play an increasingly pivotal role. The Workflow Class is not a static tool; it is a dynamic enabler of innovation. In the future, we can expect further enhancements and features to meet the evolving demands of AI engineering: + +### 1. Asynchronous Support +Support for asynchronous task execution will improve the efficiency of workflows, especially when tasks involve waiting for external events or resources. + +### 2. Context Managers +Introducing context manager support for tasks can simplify resource management, such as opening and closing files or database connections. + +### 3. Workflow History +Maintaining a detailed history of workflow execution, including timestamps, task durations, and input/output data, will facilitate debugging and performance analysis. + +### 4. Parallel Processing +Enhancing the module to support parallel processing with a pool of workers can significantly speed up the execution of tasks, especially for computationally intensive workflows. + +### 5. Error Handling Strategies +Providing built-in error handling strategies, such as retries, fallbacks, and circuit breakers, will further enhance the resilience of workflows. + +## Closing Thoughts +In conclusion, the journey through workflow orchestration in AI engineering has been both enlightening and empowering. The Workflow Class, and particularly the SequentialWorkflow class, has proven to be an invaluable ally in the AI engineer’s toolkit. It offers structure, modularity, and efficiency, ensuring that AI projects progress smoothly from inception to deployment. + +As AI continues to permeate every aspect of our lives, the skills acquired in this guide will remain highly relevant and sought after. AI engineers armed with workflow orchestration expertise will continue to push the boundaries of what is possible, solving complex problems, and driving innovation. + +But beyond the technical aspects, this guide also emphasizes the importance of creativity, adaptability, and problem-solving. AI engineering is not just about mastering tools; it’s about using them to make a meaningful impact on the world. + +So, whether you’re just starting your journey into AI engineering or you’re a seasoned professional seeking to expand your horizons, remember that the power of workflow orchestration lies not only in the code but in the limitless potential it unlocks for you as an AI engineer. As you embark on your own AI adventures, may this guide serve as a reliable companion, illuminating your path and inspiring your journey towards AI excellence. + +The world of AI is waiting for your innovation and creativity. With workflow orchestration as your guide, you have the tools to shape the future. The possibilities are boundless, and the future is yours to create. + +Official Swarms Links +Here is the Swarms website: + +Here is the Swarms Github: + +Here are the Swarms docs: + +And, join the Swarm community! + +Book a call with The Swarm Corporation here if you’re interested in high performance custom swarms! \ No newline at end of file diff --git a/docs/prompt.txt b/docs/prompt.txt deleted file mode 100644 index 3644be4a..00000000 --- a/docs/prompt.txt +++ /dev/null @@ -1,93 +0,0 @@ -Create multi-page long and explicit professional pytorch-like documentation for the swarms code below follow the outline for the swarms library, provide many examples and teach the user about the code, provide examples for every function, make the documentation 10,000 words, provide many usage examples and note this is markdown docs, create the documentation for the code to document. - -Now make the professional documentation for this code, provide the architecture and how the class works and why it works that way, it's purpose, provide args, their types, 3 ways of usage examples, in examples use from shapeless import x - -BE VERY EXPLICIT AND THOROUGH, MAKE IT DEEP AND USEFUL - -######## -Step 1: Understand the purpose and functionality of the module or framework - -Read and analyze the description provided in the documentation to understand the purpose and functionality of the module or framework. -Identify the key features, parameters, and operations performed by the module or framework. -Step 2: Provide an overview and introduction - -Start the documentation by providing a brief overview and introduction to the module or framework. -Explain the importance and relevance of the module or framework in the context of the problem it solves. -Highlight any key concepts or terminology that will be used throughout the documentation. -Step 3: Provide a class or function definition - -Provide the class or function definition for the module or framework. -Include the parameters that need to be passed to the class or function and provide a brief description of each parameter. -Specify the data types and default values for each parameter. -Step 4: Explain the functionality and usage - -Provide a detailed explanation of how the module or framework works and what it does. -Describe the steps involved in using the module or framework, including any specific requirements or considerations. -Provide code examples to demonstrate the usage of the module or framework. -Explain the expected inputs and outputs for each operation or function. -Step 5: Provide additional information and tips - -Provide any additional information or tips that may be useful for using the module or framework effectively. -Address any common issues or challenges that developers may encounter and provide recommendations or workarounds. -Step 6: Include references and resources - -Include references to any external resources or research papers that provide further information or background on the module or framework. -Provide links to relevant documentation or websites for further exploration. -Example Template for the given documentation: - -# Module/Function Name: MultiheadAttention - -class torch.nn.MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, device=None, dtype=None): - """ - Creates a multi-head attention module for joint information representation from the different subspaces. - - Parameters: - - embed_dim (int): Total dimension of the model. - - num_heads (int): Number of parallel attention heads. The embed_dim will be split across num_heads. - - dropout (float): Dropout probability on attn_output_weights. Default: 0.0 (no dropout). - - bias (bool): If specified, adds bias to input/output projection layers. Default: True. - - add_bias_kv (bool): If specified, adds bias to the key and value sequences at dim=0. Default: False. - - add_zero_attn (bool): If specified, adds a new batch of zeros to the key and value sequences at dim=1. Default: False. - - kdim (int): Total number of features for keys. Default: None (uses kdim=embed_dim). - - vdim (int): Total number of features for values. Default: None (uses vdim=embed_dim). - - batch_first (bool): If True, the input and output tensors are provided as (batch, seq, feature). Default: False. - - device (torch.device): If specified, the tensors will be moved to the specified device. - - dtype (torch.dtype): If specified, the tensors will have the specified dtype. - """ - - def forward(query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None, average_attn_weights=True, is_causal=False): - """ - Forward pass of the multi-head attention module. - - Parameters: - - query (Tensor): Query embeddings of shape (L, E_q) for unbatched input, (L, N, E_q) when batch_first=False, or (N, L, E_q) when batch_first=True. - - key (Tensor): Key embeddings of shape (S, E_k) for unbatched input, (S, N, E_k) when batch_first=False, or (N, S, E_k) when batch_first=True. - - value (Tensor): Value embeddings of shape (S, E_v) for unbatched input, (S, N, E_v) when batch_first=False, or (N, S, E_v) when batch_first=True. - - key_padding_mask (Optional[Tensor]): If specified, a mask indicating elements to be ignored in key for attention computation. - - need_weights (bool): If specified, returns attention weights in addition to attention outputs. Default: True. - - attn_mask (Optional[Tensor]): If specified, a mask preventing attention to certain positions. - - average_attn_weights (bool): If true, returns averaged attention weights per head. Otherwise, returns attention weights separately per head. Note that this flag only has an effect when need_weights=True. Default: True. - - is_causal (bool): If specified, applies a causal mask as the attention mask. Default: False. - - Returns: - Tuple[Tensor, Optional[Tensor]]: - - attn_output (Tensor): Attention outputs of shape (L, E) for unbatched input, (L, N, E) when batch_first=False, or (N, L, E) when batch_first=True. - - attn_output_weights (Optional[Tensor]): Attention weights of shape (L, S) when unbatched or (N, L, S) when batched. Optional, only returned when need_weights=True. - """ - - # Implementation of the forward pass of the attention module goes here - - return attn_output, attn_output_weights - - -# Usage example: - -multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) -attn_output, attn_output_weights = multihead_attn(query, key, value) -Note: - -The above template includes the class or function definition, parameters, description, and usage example. -To replicate the documentation for any other module or framework, follow the same structure and provide the specific details for that module or framework. - - -############# CODE TO DOCUMENT, DOCUMENT THE diff --git a/docs/swarms/chunkers/basechunker.md b/docs/swarms/chunkers/basechunker.md index fed03277..33b03312 100644 --- a/docs/swarms/chunkers/basechunker.md +++ b/docs/swarms/chunkers/basechunker.md @@ -53,7 +53,7 @@ The `BaseChunker` class is the core component of the `BaseChunker` module. It is #### Parameters: - `separators` (list[ChunkSeparator]): Specifies a list of `ChunkSeparator` objects used to split the text into chunks. -- `tokenizer` (OpenAiTokenizer): Defines the tokenizer to be used for counting tokens in the text. +- `tokenizer` (OpenAITokenizer): Defines the tokenizer to be used for counting tokens in the text. - `max_tokens` (int): Sets the maximum token limit for each chunk. ### 4.2. Examples diff --git a/docs/swarms/chunkers/pdf_chunker.md b/docs/swarms/chunkers/pdf_chunker.md index 5b97a551..8c92060d 100644 --- a/docs/swarms/chunkers/pdf_chunker.md +++ b/docs/swarms/chunkers/pdf_chunker.md @@ -52,7 +52,7 @@ The `PdfChunker` class is the core component of the `PdfChunker` module. It is u #### Parameters: - `separators` (list[ChunkSeparator]): Specifies a list of `ChunkSeparator` objects used to split the PDF text content into chunks. -- `tokenizer` (OpenAiTokenizer): Defines the tokenizer used for counting tokens in the text. +- `tokenizer` (OpenAITokenizer): Defines the tokenizer used for counting tokens in the text. - `max_tokens` (int): Sets the maximum token limit for each chunk. ### 4.2. Examples diff --git a/docs/swarms/models/anthropic.md b/docs/swarms/models/anthropic.md index 4d5f1fcd..85e7a428 100644 --- a/docs/swarms/models/anthropic.md +++ b/docs/swarms/models/anthropic.md @@ -70,17 +70,18 @@ class Anthropic: ```python # Import necessary modules and classes from swarms.models import Anthropic -import torch # Initialize an instance of the Anthropic class -anthropic_instance = Anthropic() +model = Anthropic( + anthropic_api_key="" +) -# Using the generate method -completion_1 = anthropic_instance.generate("What is the capital of France?") +# Using the run method +completion_1 = model.run("What is the capital of France?") print(completion_1) # Using the __call__ method -completion_2 = anthropic_instance("How far is the moon from the earth?", stop=["miles", "km"]) +completion_2 = model("How far is the moon from the earth?", stop=["miles", "km"]) print(completion_2) ``` diff --git a/docs/swarms/models/dalle3.md b/docs/swarms/models/dalle3.md new file mode 100644 index 00000000..346489c7 --- /dev/null +++ b/docs/swarms/models/dalle3.md @@ -0,0 +1,261 @@ +# `Dalle3` Documentation + +## Table of Contents + +1. [Introduction](#introduction) +2. [Installation](#installation) +3. [Quick Start](#quick-start) +4. [Dalle3 Class](#dalle3-class) + - [Attributes](#attributes) + - [Methods](#methods) +5. [Usage Examples](#usage-examples) +6. [Error Handling](#error-handling) +7. [Advanced Usage](#advanced-usage) +8. [References](#references) + +--- + +## Introduction + +The Dalle3 library is a Python module that provides an easy-to-use interface for generating images from text descriptions using the DALL·E 3 model by OpenAI. DALL·E 3 is a powerful language model capable of converting textual prompts into images. This documentation will guide you through the installation, setup, and usage of the Dalle3 library. + +--- + +## Installation + +To use the Dalle3 model, you must first install swarms: + +```bash +pip install swarms +``` + +--- + +## Quick Start + +Let's get started with a quick example of using the Dalle3 library to generate an image from a text prompt: + +```python +from swarms.models.dalle3 import Dalle3 + +# Create an instance of the Dalle3 class +dalle = Dalle3() + +# Define a text prompt +task = "A painting of a dog" + +# Generate an image from the text prompt +image_url = dalle3(task) + +# Print the generated image URL +print(image_url) +``` + +This example demonstrates the basic usage of the Dalle3 library to convert a text prompt into an image. The generated image URL will be printed to the console. + +--- + +## Dalle3 Class + +The Dalle3 library provides a `Dalle3` class that allows you to interact with the DALL·E 3 model. This class has several attributes and methods for generating images from text prompts. + +### Attributes + +- `model` (str): The name of the DALL·E 3 model. Default: "dall-e-3". +- `img` (str): The image URL generated by the Dalle3 API. +- `size` (str): The size of the generated image. Default: "1024x1024". +- `max_retries` (int): The maximum number of API request retries. Default: 3. +- `quality` (str): The quality of the generated image. Default: "standard". +- `n` (int): The number of variations to create. Default: 4. + +### Methods + +#### `__call__(self, task: str) -> Dalle3` + +This method makes a call to the Dalle3 API and returns the image URL generated from the provided text prompt. + +Parameters: +- `task` (str): The text prompt to be converted to an image. + +Returns: +- `Dalle3`: An instance of the Dalle3 class with the image URL generated by the Dalle3 API. + +#### `create_variations(self, img: str)` + +This method creates variations of an image using the Dalle3 API. + +Parameters: +- `img` (str): The image to be used for the API request. + +Returns: +- `img` (str): The image URL of the generated variations. + +--- + +## Usage Examples + +### Example 1: Basic Image Generation + +```python +from swarms.models.dalle3 import Dalle3 + +# Create an instance of the Dalle3 class +dalle3 = Dalle3() + +# Define a text prompt +task = "A painting of a dog" + +# Generate an image from the text prompt +image_url = dalle3(task) + +# Print the generated image URL +print(image_url) +``` + +### Example 2: Creating Image Variations + +```python +from swarms.models.dalle3 import Dalle3 + +# Create an instance of the Dalle3 class +dalle3 = Dalle3() + +# Define the URL of an existing image +img_url = "https://images.unsplash.com/photo-1694734479898-6ac4633158ac?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D + +# Create variations of the image +variations_url = dalle3.create_variations(img_url) + +# Print the URLs of the generated variations +print(variations_url) +``` + +Certainly! Here are additional examples that cover various edge cases and methods of the `Dalle3` class in the Dalle3 library: + +### Example 3: Customizing Image Size + +You can customize the size of the generated image by specifying the `size` parameter when creating an instance of the `Dalle3` class. Here's how to generate a smaller image: + +```python +from swarms.models.dalle3 import Dalle3 + +# Create an instance of the Dalle3 class with a custom image size +dalle3 = Dalle3(size="512x512") + +# Define a text prompt +task = "A small painting of a cat" + +# Generate a smaller image from the text prompt +image_url = dalle3(task) + +# Print the generated image URL +print(image_url) +``` + +### Example 4: Adjusting Retry Limit + +You can adjust the maximum number of API request retries using the `max_retries` parameter. Here's how to increase the retry limit: + +```python +from swarms.models.dalle3 import Dalle3 + +# Create an instance of the Dalle3 class with a higher retry limit +dalle3 = Dalle3(max_retries=5) + +# Define a text prompt +task = "An image of a landscape" + +# Generate an image with a higher retry limit +image_url = dalle3(task) + +# Print the generated image URL +print(image_url) +``` + +### Example 5: Generating Image Variations + +To create variations of an existing image, you can use the `create_variations` method. Here's an example: + +```python +from swarms.models.dalle3 import Dalle3 + +# Create an instance of the Dalle3 class +dalle3 = Dalle3() + +# Define the URL of an existing image +img_url = "https://images.unsplash.com/photo-1677290043066-12eccd944004?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + +# Create variations of the image +variations_url = dalle3.create_variations(img_url) + +# Print the URLs of the generated variations +print(variations_url) +``` + +### Example 6: Handling API Errors + +The Dalle3 library provides error handling for API-related issues. Here's how to handle and display API errors: + +```python +from swarms.models.dalle3 import Dalle3 + +# Create an instance of the Dalle3 class +dalle3 = Dalle3() + +# Define a text prompt +task = "Invalid prompt that may cause an API error" + +try: + # Attempt to generate an image with an invalid prompt + image_url = dalle3(task) + print(image_url) +except Exception as e: + print(f"Error occurred: {str(e)}") +``` + +### Example 7: Customizing Image Quality + +You can customize the quality of the generated image by specifying the `quality` parameter. Here's how to generate a high-quality image: + +```python +from swarms.models.dalle3 import Dalle3 + +# Create an instance of the Dalle3 class with high quality +dalle3 = Dalle3(quality="high") + +# Define a text prompt +task = "A high-quality image of a sunset" + +# Generate a high-quality image from the text prompt +image_url = dalle3(task) + +# Print the generated image URL +print(image_url) +``` + + +--- + +## Error Handling + +The Dalle3 library provides error handling for API-related issues. If an error occurs during API communication, the library will handle it and provide detailed error messages. Make sure to handle exceptions appropriately in your code. + +--- + +## Advanced Usage + +For advanced usage and customization of the Dalle3 library, you can explore the attributes and methods of the `Dalle3` class. Adjusting parameters such as `size`, `max_retries`, and `quality` allows you to fine-tune the image generation process to your specific needs. + +--- + +## References + +For more information about the DALL·E 3 model and the Dalle3 library, you can refer to the official OpenAI documentation and resources. + +- [OpenAI API Documentation](https://beta.openai.com/docs/) +- [DALL·E 3 Model Information](https://openai.com/research/dall-e-3) +- [Dalle3 GitHub Repository](https://github.com/openai/dall-e-3) + +--- + +This concludes the documentation for the Dalle3 library. You can now use the library to generate images from text prompts and explore its advanced features for various applications. \ No newline at end of file diff --git a/docs/swarms/models/distilled_whisperx.md b/docs/swarms/models/distilled_whisperx.md new file mode 100644 index 00000000..e9339c1e --- /dev/null +++ b/docs/swarms/models/distilled_whisperx.md @@ -0,0 +1,123 @@ +# DistilWhisperModel Documentation + +## Overview + +The `DistilWhisperModel` is a Python class designed to handle English speech recognition tasks. It leverages the capabilities of the Whisper model, which is fine-tuned for speech-to-text processes. It is designed for both synchronous and asynchronous transcription of audio inputs, offering flexibility for real-time applications or batch processing. + +## Installation + +Before you can use `DistilWhisperModel`, ensure you have the required libraries installed: + +```sh +pip3 install --upgrade swarms +``` + +## Initialization + +The `DistilWhisperModel` class is initialized with the following parameters: + +| Parameter | Type | Description | Default | +|-----------|------|-------------|---------| +| `model_id` | `str` | The identifier for the pre-trained Whisper model | `"distil-whisper/distil-large-v2"` | + +Example of initialization: + +```python +from swarms.models import DistilWhisperModel + +# Initialize with default model +model_wrapper = DistilWhisperModel() + +# Initialize with a specific model ID +model_wrapper = DistilWhisperModel(model_id='distil-whisper/distil-large-v2') +``` + +## Attributes + +After initialization, the `DistilWhisperModel` has several attributes: + +| Attribute | Type | Description | +|-----------|------|-------------| +| `device` | `str` | The device used for computation (`"cuda:0"` for GPU or `"cpu"`). | +| `torch_dtype` | `torch.dtype` | The data type used for the Torch tensors. | +| `model_id` | `str` | The model identifier string. | +| `model` | `torch.nn.Module` | The actual Whisper model loaded from the identifier. | +| `processor` | `transformers.AutoProcessor` | The processor for handling input data. | + +## Methods + +### `transcribe` + +Transcribes audio input synchronously. + +**Arguments**: + +| Argument | Type | Description | +|----------|------|-------------| +| `inputs` | `Union[str, dict]` | File path or audio data dictionary. | + +**Returns**: `str` - The transcribed text. + +**Usage Example**: + +```python +# Synchronous transcription +transcription = model_wrapper.transcribe('path/to/audio.mp3') +print(transcription) +``` + +### `async_transcribe` + +Transcribes audio input asynchronously. + +**Arguments**: + +| Argument | Type | Description | +|----------|------|-------------| +| `inputs` | `Union[str, dict]` | File path or audio data dictionary. | + +**Returns**: `Coroutine` - A coroutine that when awaited, returns the transcribed text. + +**Usage Example**: + +```python +import asyncio + +# Asynchronous transcription +transcription = asyncio.run(model_wrapper.async_transcribe('path/to/audio.mp3')) +print(transcription) +``` + +### `real_time_transcribe` + +Simulates real-time transcription of an audio file. + +**Arguments**: + +| Argument | Type | Description | +|----------|------|-------------| +| `audio_file_path` | `str` | Path to the audio file. | +| `chunk_duration` | `int` | Duration of audio chunks in seconds. | + +**Usage Example**: + +```python +# Real-time transcription simulation +model_wrapper.real_time_transcribe('path/to/audio.mp3', chunk_duration=5) +``` + +## Error Handling + +The `DistilWhisperModel` class incorporates error handling for file not found errors and generic exceptions during the transcription process. If a non-recoverable exception is raised, it is printed to the console in red to indicate failure. + +## Conclusion + +The `DistilWhisperModel` offers a convenient interface to the powerful Whisper model for speech recognition. Its design supports both batch and real-time transcription, catering to different application needs. The class's error handling and retry logic make it robust for real-world applications. + +## Additional Notes + +- Ensure you have appropriate permissions to read audio files when using file paths. +- Transcription quality depends on the audio quality and the Whisper model's performance on your dataset. +- Adjust `chunk_duration` according to the processing power of your system for real-time transcription. + +For a full list of models supported by `transformers.AutoModelForSpeechSeq2Seq`, visit the [Hugging Face Model Hub](https://huggingface.co/models). diff --git a/docs/swarms/models/fuyu.md b/docs/swarms/models/fuyu.md index e342e51e..021469e8 100644 --- a/docs/swarms/models/fuyu.md +++ b/docs/swarms/models/fuyu.md @@ -42,13 +42,6 @@ from swarms.models import Fuyu fuyu = Fuyu() ``` -### Example 1 - Initialization - -```python -from swarms.models import Fuyu - -fuyu = Fuyu() -``` 2. Generate Text with Fuyu: diff --git a/docs/swarms/models/gpt4v.md b/docs/swarms/models/gpt4v.md new file mode 100644 index 00000000..3fe3d81c --- /dev/null +++ b/docs/swarms/models/gpt4v.md @@ -0,0 +1,251 @@ +# `GPT4Vision` Documentation + +## Table of Contents +- [Overview](#overview) +- [Installation](#installation) +- [Initialization](#initialization) +- [Methods](#methods) + - [process_img](#process_img) + - [__call__](#__call__) + - [run](#run) + - [arun](#arun) +- [Configuration Options](#configuration-options) +- [Usage Examples](#usage-examples) +- [Additional Tips](#additional-tips) +- [References and Resources](#references-and-resources) + +--- + +## Overview + +The GPT4Vision Model API is designed to provide an easy-to-use interface for interacting with the OpenAI GPT-4 Vision model. This model can generate textual descriptions for images and answer questions related to visual content. Whether you want to describe images or perform other vision-related tasks, GPT4Vision makes it simple and efficient. + +The library offers a straightforward way to send images and tasks to the GPT-4 Vision model and retrieve the generated responses. It handles API communication, authentication, and retries, making it a powerful tool for developers working with computer vision and natural language processing tasks. + +## Installation + +To use the GPT4Vision Model API, you need to install the required dependencies and configure your environment. Follow these steps to get started: + +1. Install the required Python package: + + ```bash + pip3 install --upgrade swarms + ``` + +2. Make sure you have an OpenAI API key. You can obtain one by signing up on the [OpenAI platform](https://beta.openai.com/signup/). + +3. Set your OpenAI API key as an environment variable. You can do this in your code or your environment configuration. Alternatively, you can provide the API key directly when initializing the `GPT4Vision` class. + +## Initialization + +To start using the GPT4Vision Model API, you need to create an instance of the `GPT4Vision` class. You can customize its behavior by providing various configuration options, but it also comes with sensible defaults. + +Here's how you can initialize the `GPT4Vision` class: + +```python +from swarms.models.gpt4v import GPT4Vision + +gpt4vision = GPT4Vision( + api_key="Your Key" +) +``` + +The above code initializes the `GPT4Vision` class with default settings. You can adjust these settings as needed. + +## Methods + +### `process_img` + +The `process_img` method is used to preprocess an image before sending it to the GPT-4 Vision model. It takes the image path as input and returns the processed image in a format suitable for API requests. + +```python +processed_img = gpt4vision.process_img(img_path) +``` + +- `img_path` (str): The file path or URL of the image to be processed. + +### `__call__` + +The `__call__` method is the main method for interacting with the GPT-4 Vision model. It sends the image and tasks to the model and returns the generated response. + +```python +response = gpt4vision(img, tasks) +``` + +- `img` (Union[str, List[str]]): Either a single image URL or a list of image URLs to be used for the API request. +- `tasks` (List[str]): A list of tasks or questions related to the image(s). + +This method returns a `GPT4VisionResponse` object, which contains the generated answer. + +### `run` + +The `run` method is an alternative way to interact with the GPT-4 Vision model. It takes a single task and image URL as input and returns the generated response. + +```python +response = gpt4vision.run(task, img) +``` + +- `task` (str): The task or question related to the image. +- `img` (str): The image URL to be used for the API request. + +This method simplifies interactions when dealing with a single task and image. + +### `arun` + +The `arun` method is an asynchronous version of the `run` method. It allows for asynchronous processing of API requests, which can be useful in certain scenarios. + +```python +import asyncio + +async def main(): + response = await gpt4vision.arun(task, img) + print(response) + +loop = asyncio.get_event_loop() +loop.run_until_complete(main()) +``` + +- `task` (str): The task or question related to the image. +- `img` (str): The image URL to be used for the API request. + +## Configuration Options + +The `GPT4Vision` class provides several configuration options that allow you to customize its behavior: + +- `max_retries` (int): The maximum number of retries to make to the API. Default: 3 +- `backoff_factor` (float): The backoff factor to use for exponential backoff. Default: 2.0 +- `timeout_seconds` (int): The timeout in seconds for the API request. Default: 10 +- `api_key` (str): The API key to use for the API request. Default: None (set via environment variable) +- `quality` (str): The quality of the image to generate. Options: 'low' or 'high'. Default: 'low' +- `max_tokens` (int): The maximum number of tokens to use for the API request. Default: 200 + +## Usage Examples + +### Example 1: Generating Image Descriptions + +```python +gpt4vision = GPT4Vision() +img = "https://example.com/image.jpg" +tasks = ["Describe this image."] +response = gpt4vision(img, tasks) +print(response.answer) +``` + +In this example, we create an instance of `GPT4Vision`, provide an image URL, and ask the model to describe the image. The response contains the generated description. + +### Example 2: Custom Configuration + +```python +custom_config = { + "max_retries": 5, + "timeout_seconds": 20, + "quality": "high", + "max_tokens": 300, +} +gpt4vision = GPT4Vision(**custom_config) +img = "https://example.com/another_image.jpg" +tasks = ["What objects can you identify in this image?"] +response = gpt4vision(img, tasks) +print(response.answer) +``` + +In this example, we create an instance of `GPT4Vision` with custom configuration options. We set a higher timeout, request high-quality images, and allow more tokens in the response. + +### Example 3: Using the `run` Method + +```python +gpt4vision = GPT4Vision() +img = "https://example.com/image.jpg" +task = "Describe this image in detail." +response = gpt4vision.run(task, img) +print(response) +``` + +In this example, we use the `run` method to simplify the interaction by providing a single task and image URL. + +# Model Usage and Image Understanding + +The GPT-4 Vision model processes images in a unique way, allowing it to answer questions about both or each of the images independently. Here's an overview: + +| Purpose | Description | +| --------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | +| Image Understanding | The model is shown two copies of the same image and can answer questions about both or each of the images independently. | + +# Image Detail Control + +You have control over how the model processes the image and generates textual understanding by using the `detail` parameter, which has two options: `low` and `high`. + +| Detail | Description | +| -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| low | Disables the "high-res" model. The model receives a low-res 512 x 512 version of the image and represents the image with a budget of 65 tokens. Ideal for use cases not requiring high detail. | +| high | Enables "high-res" mode. The model first sees the low-res image and then creates detailed crops of input images as 512px squares based on the input image size. Uses a total of 129 tokens. | + +# Managing Images + +To use the Chat Completions API effectively, you must manage the images you pass to the model. Here are some key considerations: + +| Management Aspect | Description | +| ------------------------- | ------------------------------------------------------------------------------------------------- | +| Image Reuse | To pass the same image multiple times, include the image with each API request. | +| Image Size Optimization | Improve latency by downsizing images to meet the expected size requirements. | +| Image Deletion | After processing, images are deleted from OpenAI servers and not retained. No data is used for training. | + +# Limitations + +While GPT-4 with Vision is powerful, it has some limitations: + +| Limitation | Description | +| -------------------------------------------- | --------------------------------------------------------------------------------------------------- | +| Medical Images | Not suitable for interpreting specialized medical images like CT scans. | +| Non-English Text | May not perform optimally when handling non-Latin alphabets, such as Japanese or Korean. | +| Large Text in Images | Enlarge text within images for readability, but avoid cropping important details. | +| Rotated or Upside-Down Text/Images | May misinterpret rotated or upside-down text or images. | +| Complex Visual Elements | May struggle to understand complex graphs or text with varying colors or styles. | +| Spatial Reasoning | Struggles with tasks requiring precise spatial localization, such as identifying chess positions. | +| Accuracy | May generate incorrect descriptions or captions in certain scenarios. | +| Panoramic and Fisheye Images | Struggles with panoramic and fisheye images. | + +# Calculating Costs + +Image inputs are metered and charged in tokens. The token cost depends on the image size and detail option. + +| Example | Token Cost | +| --------------------------------------------- | ----------- | +| 1024 x 1024 square image in detail: high mode | 765 tokens | +| 2048 x 4096 image in detail: high mode | 1105 tokens | +| 4096 x 8192 image in detail: low mode | 85 tokens | + +# FAQ + +Here are some frequently asked questions about GPT-4 with Vision: + +| Question | Answer | +| -------------------------------------------- | -------------------------------------------------------------------------------------------------- | +| Fine-Tuning Image Capabilities | No, fine-tuning the image capabilities of GPT-4 is not supported at this time. | +| Generating Images | GPT-4 is used for understanding images, not generating them. | +| Supported Image File Types | Supported image file types include PNG (.png), JPEG (.jpeg and .jpg), WEBP (.webp), and non-animated GIF (.gif). | +| Image Size Limitations | Image uploads are restricted to 20MB per image. | +| Image Deletion | Uploaded images are automatically deleted after processing by the model. | +| Learning More | For more details about GPT-4 with Vision, refer to the GPT-4 with Vision system card. | +| CAPTCHA Submission | CAPTCHAs are blocked for safety reasons. | +| Rate Limits | Image processing counts toward your tokens per minute (TPM) limit. Refer to the calculating costs section for details. | +| Image Metadata | The model does not receive image metadata. | +| Handling Unclear Images | If an image is unclear, the model will do its best to interpret it, but results may be less accurate. | + + + +## Additional Tips + +- Make sure to handle potential exceptions and errors when making API requests. The library includes retries and error handling, but it's essential to handle exceptions gracefully in your code. +- Experiment with different configuration options to optimize the trade-off between response quality and response time based on your specific requirements. + +## References and Resources + +- [OpenAI Platform](https://beta.openai.com/signup/): Sign up for an OpenAI API key. +- [OpenAI API Documentation](https://platform.openai.com/docs/api-reference/chat/create): Official API documentation for the GPT-4 Vision model. + +Now you have a comprehensive understanding of the GPT4Vision Model API, its configuration options, and how to use it for various computer vision and natural language processing tasks. Start experimenting and integrating it into your projects to leverage the power of GPT-4 Vision for image-related tasks. + +# Conclusion + +With GPT-4 Vision, you have a powerful tool for understanding and generating textual descriptions for images. By considering its capabilities, limitations, and cost calculations, you can effectively leverage this model for various image-related tasks. \ No newline at end of file diff --git a/docs/swarms/models/mistral.md b/docs/swarms/models/mistral.md index 19b7b43a..c8dc179c 100644 --- a/docs/swarms/models/mistral.md +++ b/docs/swarms/models/mistral.md @@ -1,4 +1,4 @@ -# Swarms Documentation +# `Mistral` Documentation ## Table of Contents @@ -133,9 +133,7 @@ Mistral provides two methods for running the model: The `run` method is used to generate text-based responses to a given task or input. It takes a single string parameter, `task`, and returns the generated text as a string. ```python -def run - -(self, task: str) -> str: +def run(self, task: str) -> str: """ Run the model on a given task. @@ -236,6 +234,8 @@ In this section, we provide practical examples to illustrate how to use Mistral In this example, we initialize the Mistral AI agent with custom settings: ```python +from swarms.models import Mistral + model = Mistral( ai_name="My AI Assistant", device="cpu", diff --git a/docs/swarms/structs/flow.md b/docs/swarms/structs/flow.md index 9300c632..13f0541c 100644 --- a/docs/swarms/structs/flow.md +++ b/docs/swarms/structs/flow.md @@ -108,8 +108,13 @@ Here are three usage examples: ```python from swarms.structs import Flow +# Select any Language model from the models folder +from swarms.models import Mistral, OpenAIChat -flow = Flow(llm=my_language_model, max_loops=5) +llm = Mistral() +# llm = OpenAIChat() + +flow = Flow(llm=llm, max_loops=5) # Define a starting task or message initial_task = "Generate an long form analysis on the transformer model architecture." @@ -126,7 +131,7 @@ from swarms.structs import Flow def stop_when_repeats(response: str) -> bool: return "Stop" in response.lower() -flow = Flow(llm=my_language_model, max_loops=5, stopping_condition=stop_when_repeats) +flow = Flow(llm=llm, max_loops=5, stopping_condition=stop_when_repeats) ``` ### Example 3: Interactive Conversation @@ -134,7 +139,7 @@ flow = Flow(llm=my_language_model, max_loops=5, stopping_condition=stop_when_rep ```python from swarms.structs import Flow -flow = Flow(llm=my_language_model, max_loops=5, interactive=True) +flow = Flow(llm=llm, max_loops=5, interactive=True) # Provide initial task initial_task = "Rank and prioritize the following financial documents and cut out 30% of our expenses" diff --git a/docs/swarms/structs/sequential_workflow.md b/docs/swarms/structs/sequential_workflow.md new file mode 100644 index 00000000..12b38409 --- /dev/null +++ b/docs/swarms/structs/sequential_workflow.md @@ -0,0 +1,614 @@ +# `SequentialWorkflow` Documentation + +The **SequentialWorkflow** class is a Python module designed to facilitate the execution of a sequence of tasks in a sequential manner. It is a part of the `swarms.structs` package and is particularly useful for orchestrating the execution of various callable objects, such as functions or models, in a predefined order. This documentation will provide an in-depth understanding of the **SequentialWorkflow** class, including its purpose, architecture, usage, and examples. + +## Purpose and Relevance + +The **SequentialWorkflow** class is essential for managing and executing a series of tasks or processes, where each task may depend on the outcome of the previous one. It is commonly used in various application scenarios, including but not limited to: + +1. **Natural Language Processing (NLP) Workflows:** In NLP workflows, multiple language models are employed sequentially to process and generate text. Each model may depend on the results of the previous one, making sequential execution crucial. + +2. **Data Analysis Pipelines:** Data analysis often involves a series of tasks such as data preprocessing, transformation, and modeling steps. These tasks must be performed sequentially to ensure data consistency and accuracy. + +3. **Task Automation:** In task automation scenarios, there is a need to execute a series of automated tasks in a specific order. Sequential execution ensures that each task is performed in a predefined sequence, maintaining the workflow's integrity. + +By providing a structured approach to managing these tasks, the **SequentialWorkflow** class helps developers streamline their workflow execution and improve code maintainability. + +## Key Concepts and Terminology + +Before delving into the details of the **SequentialWorkflow** class, let's define some key concepts and terminology that will be used throughout the documentation: + +### Task + +A **task** refers to a specific unit of work that needs to be executed as part of the workflow. Each task is associated with a description and can be implemented as a callable object, such as a function or a model. + +### Flow + +A **flow** represents a callable object that can be a task within the **SequentialWorkflow**. Flows encapsulate the logic and functionality of a particular task. Flows can be functions, models, or any callable object that can be executed. + +### Sequential Execution + +Sequential execution refers to the process of running tasks one after the other in a predefined order. In a **SequentialWorkflow**, tasks are executed sequentially, meaning that each task starts only after the previous one has completed. + +### Workflow + +A **workflow** is a predefined sequence of tasks that need to be executed in a specific order. It represents the overall process or pipeline that the **SequentialWorkflow** manages. + +### Dashboard (Optional) + +A **dashboard** is an optional feature of the **SequentialWorkflow** that provides real-time monitoring and visualization of the workflow's progress. It displays information such as the current task being executed, task results, and other relevant metadata. + +### Max Loops + +The **maximum number of times** the entire workflow can be run. This parameter allows developers to control how many times the workflow is executed. + +### Autosaving + +**Autosaving** is a feature that allows the **SequentialWorkflow** to automatically save its state to a file at specified intervals. This feature helps in resuming a workflow from where it left off, even after interruptions. + +Now that we have a clear understanding of the key concepts and terminology, let's explore the architecture and usage of the **SequentialWorkflow** class in more detail. + +## Architecture of SequentialWorkflow + +The architecture of the **SequentialWorkflow** class is designed to provide a structured and flexible way to define, manage, and execute a sequence of tasks. It comprises the following core components: + +1. **Task**: The **Task** class represents an individual unit of work within the workflow. Each task has a description, which serves as a human-readable identifier for the task. Tasks can be implemented as callable objects, allowing for great flexibility in defining their functionality. + +2. **Workflow**: The **SequentialWorkflow** class itself represents the workflow. It manages a list of tasks in the order they should be executed. Workflows can be run sequentially or asynchronously, depending on the use case. + +3. **Task Execution**: Task execution is the process of running each task in the workflow. Tasks are executed one after another in the order they were added to the workflow. Task results can be passed as inputs to subsequent tasks. + +4. **Dashboard (Optional)**: The **SequentialWorkflow** optionally includes a dashboard feature. The dashboard provides a visual interface for monitoring the progress of the workflow. It displays information about the current task, task results, and other relevant metadata. + +5. **State Management**: The **SequentialWorkflow** supports state management, allowing developers to save and load the state of the workflow to and from JSON files. This feature is valuable for resuming workflows after interruptions or for sharing workflow configurations. + +## Usage of SequentialWorkflow + +The **SequentialWorkflow** class is versatile and can be employed in a wide range of applications. Its usage typically involves the following steps: + +1. **Initialization**: Begin by initializing any callable objects or flows that will serve as tasks in the workflow. These callable objects can include functions, models, or any other Python objects that can be executed. + +2. **Workflow Creation**: Create an instance of the **SequentialWorkflow** class. Specify the maximum number of loops the workflow should run and whether a dashboard should be displayed. + +3. **Task Addition**: Add tasks to the workflow using the `add` method. Each task should be described using a human-readable description, and the associated flow (callable object) should be provided. Additional arguments and keyword arguments can be passed to the task. + +4. **Task Execution**: Execute the workflow using the `run` method. The tasks within the workflow will be executed sequentially, with task results passed as inputs to subsequent tasks. + +5. **Accessing Results**: After running the workflow, you can access the results of each task using the `get_task_results` method or by directly accessing the `result` attribute of each task. + +6. **Optional Features**: Optionally, you can enable features such as autosaving of the workflow state and utilize the dashboard for real-time monitoring. + + +## Installation + +Before using the Sequential Workflow library, you need to install it. You can install it via pip: + +```bash +pip3 install --upgrade swarms +``` + +## Quick Start + +Let's begin with a quick example to demonstrate how to create and run a Sequential Workflow. In this example, we'll create a workflow that generates a 10,000-word blog on "health and wellness" using an AI model and then summarizes the generated content. + +```python +from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow + +# Initialize the language model flow (e.g., GPT-3) +llm = OpenAIChat( + openai_api_key="YOUR_API_KEY", + temperature=0.5, + max_tokens=3000, +) + +# Initialize flows for individual tasks +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create the Sequential Workflow +workflow = SequentialWorkflow(max_loops=1) + +# Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) +workflow.add("Summarize the generated blog", flow2) + +# Run the workflow +workflow.run() + +# Output the results +for task in workflow.tasks: + print(f"Task: {task.description}, Result: {task.result}") +``` + +This quick example demonstrates the basic usage of the Sequential Workflow. It creates two tasks and executes them sequentially. + +## Class: `Task` + +### Description + +The `Task` class represents an individual task in the workflow. A task is essentially a callable object, such as a function or a class, that can be executed sequentially. Tasks can have arguments and keyword arguments. + +### Class Definition + +```python +class Task: + def __init__(self, description: str, flow: Union[Callable, Flow], args: List[Any] = [], kwargs: Dict[str, Any] = {}, result: Any = None, history: List[Any] = []) +``` + +### Parameters + +- `description` (str): A description of the task. +- `flow` (Union[Callable, Flow]): The callable object representing the task. It can be a function, class, or a `Flow` instance. +- `args` (List[Any]): A list of positional arguments to pass to the task when executed. Default is an empty list. +- `kwargs` (Dict[str, Any]): A dictionary of keyword arguments to pass to the task when executed. Default is an empty dictionary. +- `result` (Any): The result of the task's execution. Default is `None`. +- `history` (List[Any]): A list to store the historical results of the task. Default is an empty list. + +### Methods + +#### `execute()` + +Execute the task. + +```python +def execute(self): +``` + +This method executes the task and updates the `result` and `history` attributes of the task. It checks if the task is a `Flow` instance and if the 'task' argument is needed. + +## Class: `SequentialWorkflow` + +### Description + +The `SequentialWorkflow` class is responsible for managing a sequence of tasks and executing them in a sequential order. It provides methods for adding tasks, running the workflow, and managing the state of the tasks. + +### Class Definition + +```python +class SequentialWorkflow: + def __init__(self, max_loops: int = 1, autosave: bool = False, saved_state_filepath: Optional[str] = "sequential_workflow_state.json", restore_state_filepath: Optional[str] = None, dashboard: bool = False, tasks: List[Task] = []) +``` + +### Parameters + +- `max_loops` (int): The maximum number of times to run the workflow sequentially. Default is `1`. +- `autosave` (bool): Whether to enable autosaving of the workflow state. Default is `False`. +- `saved_state_filepath` (Optional[str]): The file path to save the workflow state when autosave is enabled. Default is `"sequential_workflow_state.json"`. +- `restore_state_filepath` (Optional[str]): The file path to restore the workflow state when initializing. Default is `None`. +- `dashboard` (bool): Whether to display a dashboard with workflow information. Default is `False`. +- `tasks` (List[Task]): A list of `Task` instances representing the tasks in the workflow. Default is an empty list. + +### Methods + +#### `add(task: str, flow: Union[Callable, Flow], *args, **kwargs)` + +Add a task to the workflow. + +```python +def add(self, task: str, flow: Union[Callable, Flow], *args, **kwargs) -> None: +``` + +This method adds a new task to the workflow. You can provide a description of the task, the callable object (function, class, or `Flow` instance), and any additional positional or keyword arguments required for the task. + +#### `reset_workflow()` + +Reset the workflow by clearing the results of each task. + +```python +def reset_workflow(self) -> None: +``` + +This method clears the results of each task in the workflow, allowing you to start fresh without reinitializing the workflow. + +#### `get_task_results()` + +Get the results of each task in the workflow. + +```python +def get_task_results(self) -> Dict[str, Any]: +``` + +This method returns a dictionary containing the results of each task in the workflow, where the keys are task descriptions, and the values are the corresponding results. + +#### `remove_task(task_description: str)` + +Remove a task from the workflow. + +```python +def remove_task(self, task_description: str) -> None: +``` + +This method removes a specific task from the workflow based on its description. + +#### `update_task(task_description: str, **updates)` + +Update the arguments of a task in the workflow. + +```python +def update_task(self, task_description: str, **updates) -> None: +``` + +This method allows you to update the arguments and keyword arguments of a task in the workflow. You specify the task's description and provide the updates as keyword arguments. + +#### `save_workflow_state(filepath: Optional[str] = "sequential_workflow_state.json", **kwargs)` + +Save the workflow state to a JSON file. + +```python +def save_workflow_state(self, filepath: Optional[str] = "sequential_workflow_state.json", **kwargs) -> None: +``` + +This method saves the current state of the workflow, including the results and history of each task, to a JSON file. You can specify the file path for saving the state. + +#### `load_workflow_state(filepath: str = None, **kwargs)` + +Load the workflow state from a JSON file and restore the workflow state. + +```python +def load_workflow_state(self, filepath: str = None, **kwargs) -> None: +``` + +This method loads a previously saved workflow state from a JSON file + + and restores the state, allowing you to continue the workflow from where it was saved. You can specify the file path for loading the state. + +#### `run()` + +Run the workflow sequentially. + +```python +def run(self) -> None: +``` + +This method executes the tasks in the workflow sequentially. It checks if a task is a `Flow` instance and handles the flow of data between tasks accordingly. + +#### `arun()` + +Asynchronously run the workflow. + +```python +async def arun(self) -> None: +``` + +This method asynchronously executes the tasks in the workflow sequentially. It's suitable for use cases where asynchronous execution is required. It also handles data flow between tasks. + +#### `workflow_bootup(**kwargs)` + +Display a bootup message for the workflow. + +```python +def workflow_bootup(self, **kwargs) -> None: +``` + +This method displays a bootup message when the workflow is initialized. You can customize the message by providing additional keyword arguments. + +#### `workflow_dashboard(**kwargs)` + +Display a dashboard for the workflow. + +```python +def workflow_dashboard(self, **kwargs) -> None: +``` + +This method displays a dashboard with information about the workflow, such as the number of tasks, maximum loops, and autosave settings. You can customize the dashboard by providing additional keyword arguments. + +## Examples + +Let's explore some examples to illustrate how to use the Sequential Workflow library effectively. + +Sure, I'll recreate the usage examples section for each method and use case using the provided foundation. Here are the examples: + +### Example 1: Adding Tasks to a Sequential Workflow + +In this example, we'll create a Sequential Workflow and add tasks to it. + +```python +from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow + +# Example usage +api_key = ( + "" # Your actual API key here +) + +# Initialize the language flow +llm = OpenAIChat( + openai_api_key=api_key, + temperature=0.5, + max_tokens=3000, +) + +# Initialize Flows for individual tasks +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create the Sequential Workflow +workflow = SequentialWorkflow(max_loops=1) + +# Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) +workflow.add("Summarize the generated blog", flow2) + +# Output the list of tasks in the workflow +print("Tasks in the workflow:") +for task in workflow.tasks: + print(f"Task: {task.description}") +``` + +In this example, we create a Sequential Workflow and add two tasks to it. + +### Example 2: Resetting a Sequential Workflow + +In this example, we'll create a Sequential Workflow, add tasks to it, and then reset it. + +```python +from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow + +# Example usage +api_key = ( + "" # Your actual API key here +) + +# Initialize the language flow +llm = OpenAIChat( + openai_api_key=api_key, + temperature=0.5, + max_tokens=3000, +) + +# Initialize Flows for individual tasks +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create the Sequential Workflow +workflow = SequentialWorkflow(max_loops=1) + +# Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) +workflow.add("Summarize the generated blog", flow2) + +# Reset the workflow +workflow.reset_workflow() + +# Output the list of tasks in the workflow after resetting +print("Tasks in the workflow after resetting:") +for task in workflow.tasks: + print(f"Task: {task.description}") +``` + +In this example, we create a Sequential Workflow, add two tasks to it, and then reset the workflow, clearing all task results. + +### Example 3: Getting Task Results from a Sequential Workflow + +In this example, we'll create a Sequential Workflow, add tasks to it, run the workflow, and then retrieve the results of each task. + +```python +from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow + +# Example usage +api_key = ( + "" # Your actual API key here +) + +# Initialize the language flow +llm = OpenAIChat( + openai_api_key=api_key, + temperature=0.5, + max_tokens=3000, +) + +# Initialize Flows for individual tasks +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create the Sequential Workflow +workflow = SequentialWorkflow(max_loops=1) + +# Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) +workflow.add("Summarize the generated blog", flow2) + +# Run the workflow +workflow.run() + +# Get and display the results of each task in the workflow +results = workflow.get_task_results() +for task_description, result in results.items(): + print(f"Task: {task_description}, Result: {result}") +``` + +In this example, we create a Sequential Workflow, add two tasks to it, run the workflow, and then retrieve and display the results of each task. + +### Example 4: Removing a Task from a Sequential Workflow + +In this example, we'll create a Sequential Workflow, add tasks to it, and then remove a specific task from the workflow. + +```python +from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow + +# Example usage +api_key = ( + "" # Your actual API key here +) + +# Initialize the language flow +llm = OpenAIChat( + openai_api_key=api_key, + temperature=0.5, + max_tokens=3000, +) + +# Initialize Flows for individual tasks +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create the Sequential Workflow +workflow = SequentialWorkflow(max_loops=1) + +# Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) +workflow.add("Summarize the generated blog", flow2) + +# Remove a specific task from the workflow +workflow.remove_task("Generate a 10,000 word blog on health and wellness.") + +# Output the list of tasks in the workflow after removal +print("Tasks in the workflow after removing a task:") +for task in workflow.tasks: + print(f"Task: {task.description}") +``` + +In this example, we create a Sequential Workflow, add two tasks to it, and then remove a specific task from the workflow. + +### Example 5: Updating Task Arguments in a Sequential Workflow + +In this example, we'll create a Sequential Workflow, add tasks to it, and then update the arguments of a specific task in the workflow. + +```python +from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow + +# Example usage +api_key = ( + "" # Your actual API key here +) + +# Initialize the language flow +llm = OpenAIChat( + openai_api_key=api_key, + temperature=0.5, + max_tokens=3000, +) + +# Initialize Flows for individual tasks +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create the Sequential Workflow +workflow = SequentialWorkflow(max_loops=1) + +# Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) +workflow.add("Summarize the generated blog", flow2) + +# Update the arguments of a specific task in the workflow +workflow.update_task("Generate a 10,000 word blog on health and wellness.", max_loops=2) + +# Output the list of tasks in the workflow after updating task arguments +print("Tasks in the workflow after updating task arguments:") +for task in workflow.tasks: + print(f"Task: {task.description}, Arguments: { + +task.arguments}") +``` + +In this example, we create a Sequential Workflow, add two tasks to it, and then update the arguments of a specific task in the workflow. + +These examples demonstrate various operations and use cases for working with a Sequential Workflow. + +# Why `SequentialWorkflow`? + +## Enhancing Autonomous Agent Development + +The development of autonomous agents, whether they are conversational AI, robotic systems, or any other AI-driven application, often involves complex workflows that require a sequence of tasks to be executed in a specific order. Managing and orchestrating these tasks efficiently is crucial for building reliable and effective agents. The Sequential Workflow module serves as a valuable tool for AI engineers in achieving this goal. + +## Reliability and Coordination + +One of the primary challenges in autonomous agent development is ensuring that tasks are executed in the correct sequence and that the results of one task can be used as inputs for subsequent tasks. The Sequential Workflow module simplifies this process by allowing AI engineers to define and manage workflows in a structured and organized manner. + +By using the Sequential Workflow module, AI engineers can achieve the following benefits: + +### 1. Improved Reliability + +Reliability is a critical aspect of autonomous agents. The ability to handle errors gracefully and recover from failures is essential for building robust systems. The Sequential Workflow module offers a systematic approach to task execution, making it easier to handle errors, retry failed tasks, and ensure that the agent continues to operate smoothly. + +### 2. Task Coordination + +Coordinating tasks in the correct order is essential for achieving the desired outcome. The Sequential Workflow module enforces task sequencing, ensuring that each task is executed only when its dependencies are satisfied. This eliminates the risk of executing tasks out of order, which can lead to incorrect results. + +### 3. Code Organization + +Managing complex workflows can become challenging without proper organization. The Sequential Workflow module encourages AI engineers to structure their code in a modular and maintainable way. Each task can be encapsulated as a separate unit, making it easier to understand, modify, and extend the agent's behavior. + +### 4. Workflow Visualization + +Visualization is a powerful tool for understanding and debugging workflows. The Sequential Workflow module can be extended to include a visualization dashboard, allowing AI engineers to monitor the progress of tasks, track results, and identify bottlenecks or performance issues. + +## TODO: Future Features + +While the Sequential Workflow module offers significant advantages, there are opportunities for further enhancement. Here is a list of potential features and improvements that can be added to make it even more versatile and adaptable for various AI engineering tasks: + +### 1. Asynchronous Support + +Adding support for asynchronous task execution can improve the efficiency of workflows, especially when dealing with tasks that involve waiting for external events or resources. + +### 2. Context Managers + +Introducing context manager support for tasks can simplify resource management, such as opening and closing files, database connections, or network connections within a task's context. + +### 3. Workflow History + +Maintaining a detailed history of workflow execution, including timestamps, task durations, and input/output data, can facilitate debugging and performance analysis. + +### 4. Parallel Processing + +Enhancing the module to support parallel processing with a pool of workers can significantly speed up the execution of tasks, especially for computationally intensive workflows. + +### 5. Error Handling Strategies + +Providing built-in error handling strategies, such as retries, fallbacks, and custom error handling functions, can make the module more robust in handling unexpected failures. + +## Conclusion + +The Sequential Workflow module is a valuable tool for AI engineers working on autonomous agents and complex AI-driven applications. It offers a structured and reliable approach to defining and executing workflows, ensuring that tasks are performed in the correct sequence. By using this module, AI engineers can enhance the reliability, coordination, and maintainability of their agents. + +As the field of AI continues to evolve, the demand for efficient workflow management tools will only increase. The Sequential Workflow module is a step towards meeting these demands and empowering AI engineers to create more reliable and capable autonomous agents. With future enhancements and features, it has the potential to become an indispensable asset in the AI engineer's toolkit. + +In summary, the Sequential Workflow module provides a foundation for orchestrating complex tasks and workflows, enabling AI engineers to focus on designing intelligent agents that can perform tasks with precision and reliability. + + +## Frequently Asked Questions (FAQs) + +### Q1: What is the difference between a task and a flow in Sequential Workflows? + +**A1:** In Sequential Workflows, a **task** refers to a specific unit of work that needs to be executed. It can be implemented as a callable object, such as a Python function, and is the fundamental building block of a workflow. + +A **flow**, on the other hand, is an encapsulation of a task within the workflow. Flows define the order in which tasks are executed and can be thought of as task containers. They allow you to specify dependencies, error handling, and other workflow-related configurations. + +### Q2: Can I run tasks in parallel within a Sequential Workflow? + +**A2:** Yes, you can run tasks in parallel within a Sequential Workflow by using parallel execution techniques. This advanced feature allows you to execute multiple tasks concurrently, improving performance and efficiency. You can explore this feature further in the guide's section on "Parallel Execution." + +### Q3: How do I handle errors within Sequential Workflows? + +**A3:** Error handling within Sequential Workflows can be implemented by adding error-handling logic within your task functions. You can catch exceptions and handle errors gracefully, ensuring that your workflow can recover from unexpected scenarios. The guide also covers more advanced error handling strategies, such as retrying failed tasks and handling specific error types. + +### Q4: What are some real-world use cases for Sequential Workflows? + +**A4:** Sequential Workflows can be applied to a wide range of real-world use cases, including: + +- **Data ETL (Extract, Transform, Load) Processes:** Automating data pipelines that involve data extraction, transformation, and loading into databases or data warehouses. + +- **Batch Processing:** Running batch jobs that process large volumes of data or perform data analysis. + +- **Automation of DevOps Tasks:** Streamlining DevOps processes such as deployment, provisioning, and monitoring. + +- **Cross-system Integrations:** Automating interactions between different systems, services, or APIs. + +- **Report Generation:** Generating reports and documents automatically based on data inputs. + +- **Workflow Orchestration:** Orchestrating complex workflows involving multiple steps and dependencies. + +- **Resource Provisioning:** Automatically provisioning and managing cloud resources. + +These are just a few examples, and Sequential Workflows can be tailored to various automation needs across industries. diff --git a/docs/swarms/swarms/groupchat.md b/docs/swarms/swarms/groupchat.md new file mode 100644 index 00000000..b881513f --- /dev/null +++ b/docs/swarms/swarms/groupchat.md @@ -0,0 +1,167 @@ +# Swarms Framework Documentation + +--- + +## Overview + +The Swarms framework is a Python library designed to facilitate the creation and management of a simulated group chat environment. This environment can be used for a variety of purposes, such as training conversational agents, role-playing games, or simulating dialogues for machine learning purposes. The core functionality revolves around managing the flow of messages between different agents within the chat, as well as handling the selection and responses of these agents based on the conversation's context. + +### Purpose + +The purpose of the Swarms framework, and specifically the `GroupChat` and `GroupChatManager` classes, is to simulate a dynamic and interactive conversation between multiple agents. This simulates a real-time chat environment where each participant is represented by an agent with a specific role and behavioral patterns. These agents interact within the rules of the group chat, controlled by the `GroupChatManager`. + +### Key Features + +- **Agent Interaction**: Allows multiple agents to communicate within a group chat scenario. +- **Message Management**: Handles the storage and flow of messages within the group chat. +- **Role Play**: Enables agents to assume specific roles and interact accordingly. +- **Conversation Context**: Maintains the context of the conversation for appropriate responses by agents. + +--- + +## GroupChat Class + +The `GroupChat` class is the backbone of the Swarms framework's chat simulation. It maintains the list of agents participating in the chat, the messages that have been exchanged, and the logic to reset the chat and determine the next speaker. + +### Class Definition + +#### Parameters + +| Parameter | Type | Description | Default Value | +|------------|---------------------|--------------------------------------------------------------|---------------| +| agents | List[Flow] | List of agent flows participating in the group chat. | None | +| messages | List[Dict] | List of message dictionaries exchanged in the group chat. | None | +| max_round | int | Maximum number of rounds/messages allowed in the group chat. | 10 | +| admin_name | str | The name of the admin agent in the group chat. | "Admin" | + +#### Class Properties and Methods + +- `agent_names`: Returns a list of the names of the agents in the group chat. +- `reset()`: Clears all messages from the group chat. +- `agent_by_name(name: str) -> Flow`: Finds and returns an agent by name. +- `next_agent(agent: Flow) -> Flow`: Returns the next agent in the list. +- `select_speaker_msg() -> str`: Returns the message for selecting the next speaker. +- `select_speaker(last_speaker: Flow, selector: Flow) -> Flow`: Logic to select the next speaker based on the last speaker and the selector agent. +- `_participant_roles() -> str`: Returns a string listing all participant roles. +- `format_history(messages: List[Dict]) -> str`: Formats the history of messages for display or processing. + +### Usage Examples + +#### Example 1: Initializing a GroupChat + +```python +from swarms.structs.flow import Flow +from swarms.groupchat import GroupChat + +# Assuming Flow objects (flow1, flow2, flow3) are initialized and configured +agents = [flow1, flow2, flow3] +group_chat = GroupChat(agents=agents, messages=[], max_round=10) +``` + +#### Example 2: Resetting a GroupChat + +```python +group_chat.reset() +``` + +#### Example 3: Selecting a Speaker + +```python +last_speaker = agents[0] # Assuming this is a Flow object representing the last speaker +selector = agents[1] # Assuming this is a Flow object with the selector role + +next_speaker = group_chat.select_speaker(last_speaker, selector) +``` + +--- + +## GroupChatManager Class + +The `GroupChatManager` class acts as a controller for the `GroupChat` instance. It orchestrates the interaction between agents, prompts for tasks, and manages the rounds of conversation. + +### Class Definition + +#### Constructor Parameters + +| Parameter | Type | Description | +|------------|-------------|------------------------------------------------------| +| groupchat | GroupChat | The GroupChat instance that the manager will handle. | +| selector | Flow | The Flow object that selects the next speaker. | + +#### Methods + +- `__call__(task: str)`: Invokes the GroupChatManager with a given task string to start the conversation. + +### Usage Examples + +#### Example 1: Initializing GroupChatManager + +```python +from swarms.groupchat import GroupChat, GroupChatManager +from swarms.structs.flow import Flow + +# Initialize your agents and group chat as shown in previous examples +chat_manager = GroupChatManager(groupchat=group_chat, selector=manager) +``` + +#### Example 2: Starting a Conversation + +```python +# Start the group chat with a task +chat_history = chat_manager("Start a conversation about space exploration.") +``` + +#### Example 3: Using the Call Method + +```python +# The call method is the same as starting a conversation +chat_history = chat_manager.__call__("Discuss recent advances in AI.") +``` + +--- + +## Conclusion + +In summary, the Swarms framework offers a unique and effective solution for simulating group chat environments. Its `GroupChat` and `GroupChatManager` classes provide the necessary infrastructure to create dynamic conversations between agents, manage messages, and maintain the context of the dialogue. This framework can be instrumental in developing more sophisticated conversational agents, experimenting with social dynamics in chat environments, and providing a rich dataset for machine learning applications. + +By leveraging the framework's features, users can create complex interaction scenarios that closely mimic real-world group communication. This can prove to be a valuable asset in the fields of artificial intelligence, computational social science, and beyond. + +--- + +### Frequently Asked Questions (FAQ) + +**Q: Can the Swarms framework handle real-time interactions between agents?** + +A: The Swarms framework is designed to simulate group chat environments. While it does not handle real-time interactions as they would occur on a network, it can simulate the flow of conversation in a way that mimics real-time communication. + +**Q: Is the Swarms framework capable of natural language processing?** + +A: The framework itself is focused on the structure and management of group chats. It does not inherently include natural language processing (NLP) capabilities. However, it can be integrated with NLP tools to enhance the simulation with language understanding and generation features. + +**Q: Can I customize the roles and behaviors of agents within the framework?** + +A: Yes, the framework is designed to be flexible. You can define custom roles and behaviors for agents to fit the specific requirements of your simulation scenario. + +**Q: What are the limitations of the Swarms framework?** + +A: The framework is constrained by its design to simulate text-based group chats. It is not suitable for voice or video communication simulations. Additionally, its effectiveness depends on the sophistication of the agents’ decision-making logic, which is outside the framework itself. + +**Q: Is it possible to integrate the Swarms framework with other chat services?** + +A: The framework is can be integrated with any chat services. However, it could potentially be adapted to work with chat service APIs, where the agents could be used to simulate user behavior within a real chat application. + +**Q: How does the `GroupChatManager` select the next speaker?** + +A: The `GroupChatManager` uses a selection mechanism, which is typically based on the conversation's context and the roles of the agents, to determine the next speaker. The specifics of this mechanism can be customized to match the desired flow of the conversation. + +**Q: Can I contribute to the Swarms framework or suggest features?** + +A: As with many open-source projects, contributions and feature suggestions can usually be made through the project's repository on platforms like GitHub. It's best to check with the maintainers of the Swarms framework for their contribution guidelines. + +**Q: Are there any tutorials or community support for new users of the Swarms framework?** + +A: Documentation and usage examples are provided with the framework. Community support may be available through forums, chat groups, or the platform where the framework is hosted. Tutorials may also be available from third-party educators or in official documentation. + +**Q: What programming skills do I need to use the Swarms framework effectively?** + +A: You should have a good understanding of Python programming, including experience with classes and methods. Familiarity with the principles of agent-based modeling and conversational AI would also be beneficial. diff --git a/example.py b/example.py index e9dfac18..6c27bceb 100644 --- a/example.py +++ b/example.py @@ -1,24 +1,39 @@ from swarms.models import OpenAIChat -from swarms import Worker -from swarms.prompts import PRODUCT_AGENT_PROMPT +from swarms.structs import Flow api_key = "" +# Initialize the language model, this model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC llm = OpenAIChat( + # model_name="gpt-4" openai_api_key=api_key, temperature=0.5, + # max_tokens=100, ) -node = Worker( + +## Initialize the workflow +flow = Flow( llm=llm, - ai_name="Optimus Prime", - openai_api_key=api_key, - ai_role=PRODUCT_AGENT_PROMPT, - external_tools=None, - human_in_the_loop=False, - temperature=0.5, + max_loops=5, + dashboard=True, + # tools = [search_api, slack, ] + # stopping_condition=None, # You can define a stopping condition as needed. + # loop_interval=1, + # retry_attempts=3, + # retry_interval=1, + # interactive=False, # Set to 'True' for interactive mode. + # dynamic_temperature=False, # Set to 'True' for dynamic temperature handling. ) -task = "Locate 5 trending topics on healthy living, locate a website like NYTimes, and then generate an image of people doing those topics." -response = node.run(task) -print(response) +# out = flow.load_state("flow_state.json") +# temp = flow.dynamic_temperature() +# filter = flow.add_response_filter("Trump") +out = flow.run( + "Generate a 10,000 word blog on mental clarity and the benefits of meditation." +) +# out = flow.validate_response(out) +# out = flow.analyze_feedback(out) +# out = flow.print_history_and_memory() +# # out = flow.save_state("flow_state.json") +# print(out) diff --git a/godmode.py b/godmode.py deleted file mode 100644 index f1269d98..00000000 --- a/godmode.py +++ /dev/null @@ -1,16 +0,0 @@ -from swarms.swarms import GodMode -from swarms.models import OpenAIChat - -api_key = "" - -llm = OpenAIChat(openai_api_key=api_key) - - -llms = [llm, llm, llm] - -god_mode = GodMode(llms) - -task = "Generate a 10,000 word blog on health and wellness." - -out = god_mode.run(task) -god_mode.print_responses(task) diff --git a/groupchat.py b/groupchat.py index 6694d71f..71d40a03 100644 --- a/groupchat.py +++ b/groupchat.py @@ -1,109 +1,49 @@ -# from swarms.structs import Flow -# from swarms.models import OpenAIChat -# from swarms.swarms.groupchat import GroupChat -# from swarms.agents import SimpleAgent +from swarms import OpenAI, Flow +from swarms.swarms.groupchat import GroupChatManager, GroupChat -# api_key = "" -# llm = OpenAIChat( -# openai_api_key=api_key, -# ) +api_key = "" -# agent1 = SimpleAgent("Captain Price", Flow(llm=llm, max_loops=4)) -# agent2 = SimpleAgent("John Mactavis", Flow(llm=llm, max_loops=4)) - -# # Create a groupchat with the 2 agents -# chat = GroupChat([agent1, agent2]) - -# # Assign duties to the agents -# chat.assign_duty(agent1.name, "Buy the groceries") -# chat.assign_duty(agent2.name, "Clean the house") - -# # Initate a chat -# response = chat.run("Captain Price", "Hello, how are you John?") -# print(response) - - -from swarms.models import OpenAIChat -from swarms.structs import Flow -import random - -api_key = "" # Your API Key here - - -class GroupChat: - """ - GroupChat class that facilitates agent-to-agent communication using multiple instances of the Flow class. - """ - - def __init__(self, agents: list): - self.agents = {f"agent_{i}": agent for i, agent in enumerate(agents)} - self.message_log = [] - - def add_agent(self, agent: Flow): - agent_id = f"agent_{len(self.agents)}" - self.agents[agent_id] = agent - - def remove_agent(self, agent_id: str): - if agent_id in self.agents: - del self.agents[agent_id] - - def send_message(self, sender_id: str, recipient_id: str, message: str): - if sender_id not in self.agents or recipient_id not in self.agents: - raise ValueError("Invalid sender or recipient ID.") - formatted_message = f"{sender_id} to {recipient_id}: {message}" - self.message_log.append(formatted_message) - recipient_agent = self.agents[recipient_id] - recipient_agent.run(message) - - def broadcast_message(self, sender_id: str, message: str): - for agent_id, agent in self.agents.items(): - if agent_id != sender_id: - self.send_message(sender_id, agent_id, message) - - def get_message_log(self): - return self.message_log - - -class EnhancedGroupChatV2(GroupChat): - def __init__(self, agents: list): - super().__init__(agents) - - def multi_round_conversation(self, rounds: int = 5): - """ - Initiate a multi-round conversation between agents. - - Args: - rounds (int): The number of rounds of conversation. - """ - for _ in range(rounds): - # Randomly select a sender and recipient agent for the conversation - sender_id = random.choice(list(self.agents.keys())) - recipient_id = random.choice(list(self.agents.keys())) - while recipient_id == sender_id: # Ensure the recipient is not the sender - recipient_id = random.choice(list(self.agents.keys())) - - # Generate a message (for simplicity, a generic message is used) - message = f"Hello {recipient_id}, how are you today?" - self.send_message(sender_id, recipient_id, message) - - -# Sample usage with EnhancedGroupChatV2 -# Initialize the language model -llm = OpenAIChat( +llm = OpenAI( openai_api_key=api_key, temperature=0.5, max_tokens=3000, ) -# Initialize two Flow agents -agent1 = Flow(llm=llm, max_loops=5, dashboard=True) -agent2 = Flow(llm=llm, max_loops=5, dashboard=True) +# Initialize the flow +flow1 = Flow( + llm=llm, + max_loops=1, + system_prompt="YOU ARE SILLY, YOU OFFER NOTHING OF VALUE", + name="silly", + dashboard=True, +) +flow2 = Flow( + llm=llm, + max_loops=1, + system_prompt="YOU ARE VERY SMART AND ANSWER RIDDLES", + name="detective", + dashboard=True, +) +flow3 = Flow( + llm=llm, + max_loops=1, + system_prompt="YOU MAKE RIDDLES", + name="riddler", + dashboard=True, +) +manager = Flow( + llm=llm, + max_loops=1, + system_prompt="YOU ARE A GROUP CHAT MANAGER", + name="manager", + dashboard=True, +) -# Create an enhanced group chat with the two agents -enhanced_group_chat_v2 = EnhancedGroupChatV2(agents=[agent1, agent2]) -# Simulate multi-round agent to agent communication -enhanced_group_chat_v2.multi_round_conversation(rounds=5) +# Example usage: +agents = [flow1, flow2, flow3] -enhanced_group_chat_v2.get_message_log() # Get the conversation log +group_chat = GroupChat(agents=agents, messages=[], max_round=10) +chat_manager = GroupChatManager(groupchat=group_chat, selector=manager) +chat_history = chat_manager("Write me a riddle") diff --git a/mkdocs.yml b/mkdocs.yml index bf155336..3a212201 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -61,34 +61,19 @@ nav: - Home: - Overview: "index.md" - Contributing: "contributing.md" - - FAQ: "faq.md" - - Purpose: "purpose.md" - - Roadmap: "roadmap.md" - - Weaknesses: "failures.md" - - Design: "design.md" - - Flywheel: "flywheel.md" - - Bounties: "bounties.md" - - Metric: "metric.md" - - Distribution: "distribution" - - Research: "research.md" - - Demos: "demos.md" - - Architecture: "architecture.md" - - Checklist: "checklist.md" - - Hiring: "hiring.md" - Swarms: - Overview: "swarms/index.md" - swarms.swarms: - AbstractSwarm: "swarms/swarms/abstractswarm.md" - AutoScaler: "swarms/swarms/autoscaler.md" - GodMode: "swarms/swarms/godmode.md" + - Groupchat: "swarms/swarms/groupchat.md" - swarms.workers: - - AbstractWorker: "swarms/workers/base.md" - Overview: "swarms/workers/index.md" - AbstractWorker: "swarms/workers/abstract_worker.md" - swarms.agents: - AbstractAgent: "swarms/agents/abstract_agent.md" - OmniModalAgent: "swarms/agents/omni_agent.md" - - Idea2Image: "swarms/agents/idea_to_image.md" - swarms.models: - Language: - Overview: "swarms/models/index.md" @@ -98,6 +83,7 @@ nav: - Zephyr: "swarms/models/zephyr.md" - BioGPT: "swarms/models/biogpt.md" - MPT7B: "swarms/models/mpt.md" + - Mistral: "swarms/models/mistral.md" - MultiModal: - Fuyu: "swarms/models/fuyu.md" - Vilt: "swarms/models/vilt.md" @@ -105,28 +91,45 @@ nav: - BingChat: "swarms/models/bingchat.md" - Kosmos: "swarms/models/kosmos.md" - Nougat: "swarms/models/nougat.md" + - Dalle3: "swarms/models/dalle3.md" + - GPT4V: "swarms/models/gpt4v.md" - LayoutLMDocumentQA: "swarms/models/layoutlm_document_qa.md" + - DistilWhisperModel: "swarms/models/distilled_whisperx.md" - swarms.structs: - Overview: "swarms/structs/overview.md" - Workflow: "swarms/structs/workflow.md" - Flow: "swarms/structs/flow.md" + - SequentialWorkflow: 'swarms/structs/sequential_workflow.md' - swarms.memory: - PineconeVectorStoreStore: "swarms/memory/pinecone.md" - PGVectorStore: "swarms/memory/pg.md" - swarms.chunkers: - BaseChunker: "swarms/chunkers/basechunker.md" - PdfChunker: "swarms/chunkers/pdf_chunker.md" -- Walkthroughs: +- Guides: - Overview: "examples/index.md" - - Structs: - - Flow: "examples/flow.md" - Agents: + - Flow: "examples/flow.md" + - SequentialWorkflow: "examples/reliable_autonomous_agents.md" - OmniAgent: "examples/omni_agent.md" - - Worker: - - Basic: "examples/worker.md" - - StackedWorker: "examples/stacked_worker.md" + - 2O+ Autonomous Agent Blogs: "examples/ideas.md" - Applications: - CustomerSupport: - Overview: "applications/customer_support.md" - Marketing: - Overview: "applications/marketing_agencies.md" +- Corporate: + - FAQ: "corporate/faq.md" + - Purpose: "corporate/purpose.md" + - Roadmap: "corporate/roadmap.md" + - Weaknesses: "corporate/failures.md" + - Design: "corporate/design.md" + - Flywheel: "corporate/flywheel.md" + - Bounties: "corporate/bounties.md" + - Metric: "corporate/metric.md" + - Distribution: "corporate/distribution" + - Research: "corporate/research.md" + - Demos: "corporate/demos.md" + - Architecture: "corporate/architecture.md" + - Checklist: "corporate/checklist.md" + - Hiring: "corporate/hiring.md" diff --git a/simple_agent.py b/playground/agents/simple_agent.py similarity index 100% rename from simple_agent.py rename to playground/agents/simple_agent.py diff --git a/playground/models/anthropic_example.py b/playground/models/anthropic_example.py new file mode 100644 index 00000000..940892ca --- /dev/null +++ b/playground/models/anthropic_example.py @@ -0,0 +1,9 @@ +from swarms.models.anthropic import Anthropic + + +model = Anthropic(anthropic_api_key="") + + +task = "What is quantum field theory? What are 3 books on the field?" + +print(model(task)) diff --git a/playground/models/dalle3.jpeg b/playground/models/dalle3.jpeg new file mode 100644 index 00000000..39753795 Binary files /dev/null and b/playground/models/dalle3.jpeg differ diff --git a/playground/models/dalle3.py b/playground/models/dalle3.py new file mode 100644 index 00000000..ac9ba760 --- /dev/null +++ b/playground/models/dalle3.py @@ -0,0 +1,6 @@ +from swarms.models.dalle3 import Dalle3 + +model = Dalle3() + +task = "A painting of a dog" +img = model(task) diff --git a/playground/models/fuyu.py b/playground/models/fuyu.py deleted file mode 100644 index 6047855e..00000000 --- a/playground/models/fuyu.py +++ /dev/null @@ -1,4 +0,0 @@ -from swarms.models import Fuyu - -fuyu = Fuyu() -fuyu("Hello, my name is", "images/github-banner-swarms.png") diff --git a/playground/models/fuyu_example.py b/playground/models/fuyu_example.py new file mode 100644 index 00000000..612c002e --- /dev/null +++ b/playground/models/fuyu_example.py @@ -0,0 +1,7 @@ +from swarms.models.fuyu import Fuyu + +img = "dalle3.jpeg" + +fuyu = Fuyu() + +fuyu("What is this image", img) diff --git a/playground/models/gpt4_v.py b/playground/models/gpt4_v.py new file mode 100644 index 00000000..5e5d7c95 --- /dev/null +++ b/playground/models/gpt4_v.py @@ -0,0 +1,15 @@ +from swarms.models.gpt4v import GPT4Vision + +api_key = "" + +gpt4vision = GPT4Vision( + openai_api_key=api_key, +) + +img = "https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/VFPt_Solenoid_correct2.svg/640px-VFPt_Solenoid_correct2.svg.png" + +task = "What is this image" + +answer = gpt4vision.run(task, img) + +print(answer) diff --git a/playground/models/gpt4vision_example.py b/playground/models/gpt4vision_example.py new file mode 100644 index 00000000..7306fc56 --- /dev/null +++ b/playground/models/gpt4vision_example.py @@ -0,0 +1,7 @@ +from swarms.models.gpt4v import GPT4Vision + +gpt4vision = GPT4Vision(api_key="") +task = "What is the following image about?" +img = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + +answer = gpt4vision.run(task, img) diff --git a/playground/models/multitemp.py b/playground/models/multitemp.py deleted file mode 100644 index f4146390..00000000 --- a/playground/models/multitemp.py +++ /dev/null @@ -1,56 +0,0 @@ -from swarms.models import OpenAIChat # Replace with your actual OpenAIChat import - -if __name__ == "__main__": - api_key = "" # Your OpenAI API key here - agent = MultiTempAgent(api_key) - - prompt = "Write a blog post about health and wellness" - final_output = agent.run(prompt) - - print("Final chosen output:") - print(final_output) - - -class MultiTempAgent: - def __init__(self, api_key, default_temp=0.5, alt_temps=[0.2, 0.7, 0.9]): - self.api_key = api_key - self.default_temp = default_temp - self.alt_temps = alt_temps - - def ask_user_feedback(self, text): - print(f"Generated text: {text}") - feedback = input("Are you satisfied with this output? (yes/no): ") - return feedback.lower() == "yes" - - def present_options_to_user(self, outputs): - print("Alternative outputs:") - for temp, output in outputs.items(): - print(f"Temperature {temp}: {output}") - chosen_temp = float(input("Choose the temperature of the output you like: ")) - return outputs.get(chosen_temp, "Invalid temperature chosen.") - - def run(self, prompt): - try: - llm = OpenAIChat(openai_api_key=self.api_key, temperature=self.default_temp) - initial_output = llm(prompt) # Using llm as a callable - except Exception as e: - print(f"Error generating initial output: {e}") - initial_output = None - - user_satisfied = self.ask_user_feedback(initial_output) - - if user_satisfied: - return initial_output - else: - outputs = {} - for temp in self.alt_temps: - try: - llm = OpenAIChat( - openai_api_key=self.api_key, temperature=temp - ) # Re-initializing - outputs[temp] = llm(prompt) # Using llm as a callable - except Exception as e: - print(f"Error generating text at temperature {temp}: {e}") - outputs[temp] = None - chosen_output = self.present_options_to_user(outputs) - return chosen_output diff --git a/playground/models/openai_model.py b/playground/models/openai_model.py index eccbb8cc..3b9cb967 100644 --- a/playground/models/openai_model.py +++ b/playground/models/openai_model.py @@ -2,5 +2,5 @@ from swarms.models.openai_models import OpenAIChat openai = OpenAIChat(openai_api_key="", verbose=False) -chat = openai("Are quantum fields everywhere?") +chat = openai("What are quantum fields?") print(chat) diff --git a/playground/structs/flow.py b/playground/structs/flow.py index e69de29b..8e34cce3 100644 --- a/playground/structs/flow.py +++ b/playground/structs/flow.py @@ -0,0 +1,35 @@ +from swarms.models import OpenAIChat +from swarms.structs import Flow + +api_key = "" + +# Initialize the language model, this model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC +llm = OpenAIChat( + # model_name="gpt-4" + openai_api_key=api_key, + temperature=0.5, + # max_tokens=100, +) + +## Initialize the workflow +flow = Flow( + llm=llm, + max_loops=2, + dashboard=True, + # stopping_condition=None, # You can define a stopping condition as needed. + # loop_interval=1, + # retry_attempts=3, + # retry_interval=1, + # interactive=False, # Set to 'True' for interactive mode. + # dynamic_temperature=False, # Set to 'True' for dynamic temperature handling. +) + +# out = flow.load_state("flow_state.json") +# temp = flow.dynamic_temperature() +# filter = flow.add_response_filter("Trump") +out = flow.run("Generate a 10,000 word blog on health and wellness.") +# out = flow.validate_response(out) +# out = flow.analyze_feedback(out) +# out = flow.print_history_and_memory() +# # out = flow.save_state("flow_state.json") +# print(out) diff --git a/playground/structs/sequential_workflow.py b/playground/structs/sequential_workflow.py new file mode 100644 index 00000000..b8e5a10b --- /dev/null +++ b/playground/structs/sequential_workflow.py @@ -0,0 +1,31 @@ +from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow + +# Example usage +llm = OpenAIChat( + temperature=0.5, + max_tokens=3000, +) + +# Initialize the Flow with the language flow +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create another Flow for a different task +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create the workflow +workflow = SequentialWorkflow(max_loops=1) + +# Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) + +# Suppose the next task takes the output of the first task as input +workflow.add("Summarize the generated blog", flow2) + +# Run the workflow +workflow.run() + +# Output the results +for task in workflow.tasks: + print(f"Task: {task.description}, Result: {task.result}") diff --git a/playground/swarms/godmode.py b/playground/swarms/godmode.py index 66aec1fa..f1269d98 100644 --- a/playground/swarms/godmode.py +++ b/playground/swarms/godmode.py @@ -1,39 +1,16 @@ +from swarms.swarms import GodMode from swarms.models import OpenAIChat -from swarms.swarms import GodMode -from swarms.workers.worker import Worker +api_key = "" + +llm = OpenAIChat(openai_api_key=api_key) -llm = OpenAIChat(model_name="gpt-4", openai_api_key="api-key", temperature=0.5) -worker1 = Worker( - llm=llm, - ai_name="Bumble Bee", - ai_role="Worker in a swarm", - external_tools=None, - human_in_the_loop=False, - temperature=0.5, -) -worker2 = Worker( - llm=llm, - ai_name="Optimus Prime", - ai_role="Worker in a swarm", - external_tools=None, - human_in_the_loop=False, - temperature=0.5, -) -worker3 = Worker( - llm=llm, - ai_name="Megatron", - ai_role="Worker in a swarm", - external_tools=None, - human_in_the_loop=False, - temperature=0.5, -) -# Usage -agents = [worker1, worker2, worker3] +llms = [llm, llm, llm] -god_mode = GodMode(agents) +god_mode = GodMode(llms) -task = "What are the biggest risks facing humanity?" +task = "Generate a 10,000 word blog on health and wellness." +out = god_mode.run(task) god_mode.print_responses(task) diff --git a/playground/swarms/groupchat.py b/playground/swarms/groupchat.py index a5e8dd0d..739181d1 100644 --- a/playground/swarms/groupchat.py +++ b/playground/swarms/groupchat.py @@ -1,61 +1,49 @@ -from swarms.models import OpenAIChat -from swarms.swarms import GroupChat, GroupChatManager -from swarms.workers import Worker +from swarms import OpenAI, Flow +from swarms.swarms.groupchat import GroupChatManager, GroupChat -llm = OpenAIChat(model_name="gpt-4", openai_api_key="api-key", temperature=0.5) -node = Worker( - llm=llm, - ai_name="Optimus Prime", - ai_role="Worker in a swarm", - external_tools=None, - human_in_the_loop=False, +api_key = "" + +llm = OpenAI( + openai_api_key=api_key, temperature=0.5, + max_tokens=3000, ) -node2 = Worker( +# Initialize the flow +flow1 = Flow( llm=llm, - ai_name="Optimus Prime", - ai_role="Worker in a swarm", - external_tools=None, - human_in_the_loop=False, - temperature=0.5, + max_loops=1, + system_message="YOU ARE SILLY, YOU OFFER NOTHING OF VALUE", + name="silly", + dashboard=True, ) - -node3 = Worker( +flow2 = Flow( llm=llm, - ai_name="Optimus Prime", - ai_role="Worker in a swarm", - external_tools=None, - human_in_the_loop=False, - temperature=0.5, + max_loops=1, + system_message="YOU ARE VERY SMART AND ANSWER RIDDLES", + name="detective", + dashboard=True, ) - -nodes = [node, node2, node3] - -messages = [ - { - "role": "system", - "context": "Create an a small feedforward in pytorch", - } -] - -group = GroupChat( - workers=nodes, - messages=messages, - max_rounds=3, +flow3 = Flow( + llm=llm, + max_loops=1, + system_message="YOU MAKE RIDDLES", + name="riddler", + dashboard=True, ) - - -manager = GroupChatManager( - groupchat=group, - max_consecutive_auto_reply=3, +manager = Flow( + llm=llm, + max_loops=1, + system_message="YOU ARE A GROUP CHAT MANAGER", + name="manager", + dashboard=True, ) -output = group.run( - messages, - sender=node, - config=group, -) -print(output) +# Example usage: +agents = [flow1, flow2, flow3] + +group_chat = GroupChat(agents=agents, messages=[], max_round=10) +chat_manager = GroupChatManager(groupchat=group_chat, selector=manager) +chat_history = chat_manager("Write me a riddle") diff --git a/playground/workflow.py b/playground/workflow.py index a5d0ea03..78909dc7 100644 --- a/playground/workflow.py +++ b/playground/workflow.py @@ -1,5 +1,5 @@ from swarms import Workflow -from swarms.tools.autogpt import ChatOpenAI +from swarms.models import ChatOpenAI workflow = Workflow(ChatOpenAI) diff --git a/pyproject.toml b/pyproject.toml index 4af20ee0..c44cf9dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "1.9.3" +version = "2.1.6" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] @@ -28,7 +28,6 @@ openai = "*" langchain = "*" asyncio = "*" nest_asyncio = "*" -pegasusx = "*" einops = "*" google-generativeai = "*" torch = "*" @@ -38,21 +37,22 @@ duckduckgo-search = "*" faiss-cpu = "*" datasets = "*" diffusers = "*" +accelerate = "*" sentencepiece = "*" wget = "*" griptape = "*" httpx = "*" +tiktoken = "*" attrs = "*" ggl = "*" +ratelimit = "*" + beautifulsoup4 = "*" huggingface-hub = "*" pydantic = "*" tenacity = "*" -redis = "*" Pillow = "*" chromadb = "*" -agent-protocol = "*" -open-interpreter = "*" tabulate = "*" termcolor = "*" black = "*" diff --git a/requirements.txt b/requirements.txt index 7ff9d362..5cb854b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,12 +28,15 @@ google-generativeai sentencepiece duckduckgo-search agent-protocol +accelerate chromadb +tiktoken open-interpreter tabulate colored griptape addict +ratelimit albumentations basicsr termcolor diff --git a/sequential_workflow_example.py b/sequential_workflow_example.py new file mode 100644 index 00000000..9dc9c828 --- /dev/null +++ b/sequential_workflow_example.py @@ -0,0 +1,35 @@ +from swarms.models import OpenAIChat +from swarms.structs import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow + +# Example usage +api_key = "" + +# Initialize the language flow +llm = OpenAIChat( + openai_api_key=api_key, + temperature=0.5, + max_tokens=3000, +) + +# Initialize the Flow with the language flow +flow1 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create another Flow for a different task +flow2 = Flow(llm=llm, max_loops=1, dashboard=False) + +# Create the workflow +workflow = SequentialWorkflow(max_loops=1) + +# Add tasks to the workflow +workflow.add("Generate a 10,000 word blog on health and wellness.", flow1) + +# Suppose the next task takes the output of the first task as input +workflow.add("Summarize the generated blog", flow2) + +# Run the workflow +workflow.run() + +# Output the results +for task in workflow.tasks: + print(f"Task: {task.description}, Result: {task.result}") diff --git a/swarms/__init__.py b/swarms/__init__.py index dda0aff2..f45f876f 100644 --- a/swarms/__init__.py +++ b/swarms/__init__.py @@ -6,12 +6,9 @@ warnings.filterwarnings("ignore", category=UserWarning) # disable tensorflow warnings os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" - - -from swarms.workers import * -from swarms.workers.worker import Worker -from swarms.chunkers import * -from swarms.models import * # import * only works when __all__ = [] is defined in __init__.py -from swarms.structs import * -from swarms.swarms import * from swarms.agents import * +from swarms.swarms import * +from swarms.structs import * +from swarms.models import * +from swarms.chunkers import * +from swarms.workers import * diff --git a/swarms/agents/__init__.py b/swarms/agents/__init__.py index f622f3f8..2b67eec0 100644 --- a/swarms/agents/__init__.py +++ b/swarms/agents/__init__.py @@ -4,9 +4,9 @@ from swarms.agents.message import Message # from swarms.agents.stream_response import stream from swarms.agents.base import AbstractAgent from swarms.agents.registry import Registry -from swarms.agents.idea_to_image_agent import Idea2Image -from swarms.agents.simple_agent import SimpleAgent +# from swarms.agents.idea_to_image_agent import Idea2Image +from swarms.agents.simple_agent import SimpleAgent """Agent Infrastructure, models, memory, utils, tools""" @@ -16,6 +16,6 @@ __all__ = [ "Message", "AbstractAgent", "Registry", - "Idea2Image", + # "Idea2Image", "SimpleAgent", ] diff --git a/swarms/agents/agent.py b/swarms/agents/agent.py index 109501f9..bad9d3bb 100644 --- a/swarms/agents/agent.py +++ b/swarms/agents/agent.py @@ -34,7 +34,6 @@ from langchain_experimental.autonomous_agents.autogpt.prompt_generator import ( ) from langchain_experimental.pydantic_v1 import BaseModel, ValidationError - # PROMPT FINISH_NAME = "finish" @@ -111,8 +110,7 @@ class AutoGPTPrompt(BaseChatPromptTemplate, BaseModel): # type: ignore[misc] [self.token_counter(doc) for doc in relevant_memory] ) content_format = ( - f"This reminds you of these events " - f"from your past:\n{relevant_memory}\n\n" + f"This reminds you of these events from your past:\n{relevant_memory}\n\n" ) memory_message = SystemMessage(content=content_format) used_tokens += self.token_counter(memory_message.content) @@ -233,14 +231,14 @@ class PromptGenerator: formatted_response_format = json.dumps(self.response_format, indent=4) prompt_string = ( f"Constraints:\n{self._generate_numbered_list(self.constraints)}\n\n" - f"Commands:\n" + "Commands:\n" f"{self._generate_numbered_list(self.commands, item_type='command')}\n\n" f"Resources:\n{self._generate_numbered_list(self.resources)}\n\n" - f"Performance Evaluation:\n" + "Performance Evaluation:\n" f"{self._generate_numbered_list(self.performance_evaluation)}\n\n" - f"You should only respond in JSON format as described below " + "You should only respond in JSON format as described below " f"\nResponse Format: \n{formatted_response_format} " - f"\nEnsure the response can be parsed by Python json.loads" + "\nEnsure the response can be parsed by Python json.loads" ) return prompt_string @@ -419,13 +417,11 @@ class AutoGPT: else: result = ( f"Unknown command '{action.name}'. " - f"Please refer to the 'COMMANDS' list for available " - f"commands and only respond in the specified JSON format." + "Please refer to the 'COMMANDS' list for available " + "commands and only respond in the specified JSON format." ) - memory_to_add = ( - f"Assistant Reply: {assistant_reply} " f"\nResult: {result} " - ) + memory_to_add = f"Assistant Reply: {assistant_reply} \nResult: {result} " if self.feedback_tool is not None: feedback = f"\n{self.feedback_tool.run('Input: ')}" if feedback in {"q", "stop"}: diff --git a/swarms/agents/aot.py b/swarms/agents/aot.py index 22af950e..b36fb43c 100644 --- a/swarms/agents/aot.py +++ b/swarms/agents/aot.py @@ -75,7 +75,8 @@ class OpenAI: except openai_model.error.RateLimitError as e: sleep_duratoin = os.environ.get("OPENAI_RATE_TIMEOUT", 30) print( - f"{str(e)}, sleep for {sleep_duratoin}s, set it by env OPENAI_RATE_TIMEOUT" + f"{str(e)}, sleep for {sleep_duratoin}s, set it by env" + " OPENAI_RATE_TIMEOUT" ) time.sleep(sleep_duratoin) diff --git a/swarms/agents/browser_agent.py b/swarms/agents/browser_agent.py index 2cede22a..02c4ef0d 100644 --- a/swarms/agents/browser_agent.py +++ b/swarms/agents/browser_agent.py @@ -53,10 +53,12 @@ def record(agent_name: str, autotab_ext_path: Optional[str] = None): file.write(data) print( - "\033[34mYou have the Python debugger open, you can run commands in it like you would in a normal Python shell.\033[0m" + "\033[34mYou have the Python debugger open, you can run commands in it like you" + " would in a normal Python shell.\033[0m" ) print( - "\033[34mTo exit, type 'q' and press enter. For a list of commands type '?' and press enter.\033[0m" + "\033[34mTo exit, type 'q' and press enter. For a list of commands type '?' and" + " press enter.\033[0m" ) breakpoint() @@ -116,7 +118,8 @@ def open_plugin_and_login(driver: AutotabChromeDriver): raise Exception("Invalid API key") else: raise Exception( - f"Error {response.status_code} from backend while logging you in with your API key: {response.text}" + f"Error {response.status_code} from backend while logging you in" + f" with your API key: {response.text}" ) cookie["name"] = cookie["key"] del cookie["key"] @@ -144,7 +147,8 @@ def get_driver( options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") # Necessary for running options.add_argument( - "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" + "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + " (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" ) options.add_argument("--enable-webgl") options.add_argument("--enable-3d-apis") @@ -371,7 +375,10 @@ def _login_with_google(driver, url: str, google_credentials: SiteCredentials): ) main_window = driver.current_window_handle - xpath = "//*[contains(text(), 'Continue with Google') or contains(text(), 'Sign in with Google') or contains(@title, 'Sign in with Google')]" + xpath = ( + "//*[contains(text(), 'Continue with Google') or contains(text(), 'Sign in with" + " Google') or contains(@title, 'Sign in with Google')]" + ) WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, xpath))) driver.find_element( @@ -477,8 +484,6 @@ def play(agent_name: Optional[str] = None): if __name__ == "__main__": play() - - """ @@ -496,17 +501,17 @@ google_credentials: # Optional, specify alternative accounts to use with Google login on a per-service basis - email: you@gmail.com # Credentials without a name use email as key password: ... - + credentials: - notion.so: + notion.so: alts: - notion.com login_with_google_account: default - + figma.com: email: ... password: ... - + airtable.com: login_with_google_account: you@gmail.com """ diff --git a/swarms/agents/companion.py b/swarms/agents/companion.py new file mode 100644 index 00000000..a630895e --- /dev/null +++ b/swarms/agents/companion.py @@ -0,0 +1,4 @@ +""" +Companion agents converse with the user about the agent the user wants to create then creates the agent with the desired attributes and traits and tools and configurations + +""" diff --git a/swarms/agents/hf_agents.py b/swarms/agents/hf_agents.py index 28c18c71..4e186e3a 100644 --- a/swarms/agents/hf_agents.py +++ b/swarms/agents/hf_agents.py @@ -19,7 +19,6 @@ from transformers.utils import is_offline_mode, is_openai_available, logging # utils logger = logging.get_logger(__name__) - if is_openai_available(): import openai @@ -28,7 +27,6 @@ else: _tools_are_initialized = False - BASE_PYTHON_TOOLS = { "print": print, "range": range, @@ -48,7 +46,6 @@ class PreTool: HUGGINGFACE_DEFAULT_TOOLS = {} - HUGGINGFACE_DEFAULT_TOOLS_FROM_HUB = [ "image-transformation", "text-download", @@ -229,12 +226,14 @@ class Agent: if len(replacements) > 1: names = "\n".join([f"- {n}: {t}" for n, t in replacements.items()]) logger.warning( - f"The following tools have been replaced by the ones provided in `additional_tools`:\n{names}." + "The following tools have been replaced by the ones provided in" + f" `additional_tools`:\n{names}." ) elif len(replacements) == 1: name = list(replacements.keys())[0] logger.warning( - f"{name} has been replaced by {replacements[name]} as provided in `additional_tools`." + f"{name} has been replaced by {replacements[name]} as provided in" + " `additional_tools`." ) self.prepare_for_new_chat() @@ -425,9 +424,9 @@ class HFAgent(Agent): api_key = os.environ.get("OPENAI_API_KEY", None) if api_key is None: raise ValueError( - "You need an openai key to use `OpenAIAgent`. You can get one here: Get one here " - "https://openai.com/api/`. If you have one, set it in your env with `os.environ['OPENAI_API_KEY'] = " - "xxx." + "You need an openai key to use `OpenAIAgent`. You can get one here: Get" + " one here https://openai.com/api/`. If you have one, set it in your" + " env with `os.environ['OPENAI_API_KEY'] = xxx." ) else: openai.api_key = api_key @@ -540,8 +539,9 @@ class AzureOpenAI(Agent): api_key = os.environ.get("AZURE_OPENAI_API_KEY", None) if api_key is None: raise ValueError( - "You need an Azure openAI key to use `AzureOpenAIAgent`. If you have one, set it in your env with " - "`os.environ['AZURE_OPENAI_API_KEY'] = xxx." + "You need an Azure openAI key to use `AzureOpenAIAgent`. If you have" + " one, set it in your env with `os.environ['AZURE_OPENAI_API_KEY'] =" + " xxx." ) else: openai.api_key = api_key @@ -549,8 +549,9 @@ class AzureOpenAI(Agent): resource_name = os.environ.get("AZURE_OPENAI_RESOURCE_NAME", None) if resource_name is None: raise ValueError( - "You need a resource_name to use `AzureOpenAIAgent`. If you have one, set it in your env with " - "`os.environ['AZURE_OPENAI_RESOURCE_NAME'] = xxx." + "You need a resource_name to use `AzureOpenAIAgent`. If you have one," + " set it in your env with `os.environ['AZURE_OPENAI_RESOURCE_NAME'] =" + " xxx." ) else: openai.api_base = f"https://{resource_name}.openai.azure.com" diff --git a/swarms/agents/idea_to_image_agent.py b/swarms/agents/idea_to_image_agent.py index e2a06691..ce3654e0 100644 --- a/swarms/agents/idea_to_image_agent.py +++ b/swarms/agents/idea_to_image_agent.py @@ -1,7 +1,7 @@ import os import logging from dataclasses import dataclass -from dalle3 import Dalle +from swarms.models.dalle3 import Dalle from swarms.models import OpenAIChat diff --git a/swarms/agents/multi_modal_visual_agent.py b/swarms/agents/multi_modal_visual_agent.py index 68941ef0..34780594 100644 --- a/swarms/agents/multi_modal_visual_agent.py +++ b/swarms/agents/multi_modal_visual_agent.py @@ -270,10 +270,12 @@ class InstructPix2Pix: @prompts( name="Instruct Image Using Text", - description="useful when you want to the style of the image to be like the text. " - "like: make it look like a painting. or make it like a robot. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the text. ", + description=( + "useful when you want to the style of the image to be like the text. " + "like: make it look like a painting. or make it like a robot. " + "The input to this tool should be a comma separated string of two, " + "representing the image_path and the text. " + ), ) def inference(self, inputs): """Change style of image.""" @@ -286,8 +288,8 @@ class InstructPix2Pix: updated_image_path = get_new_image_name(image_path, func_name="pix2pix") image.save(updated_image_path) print( - f"\nProcessed InstructPix2Pix, Input Image: {image_path}, Instruct Text: {text}, " - f"Output Image: {updated_image_path}" + f"\nProcessed InstructPix2Pix, Input Image: {image_path}, Instruct Text:" + f" {text}, Output Image: {updated_image_path}" ) return updated_image_path @@ -309,9 +311,12 @@ class Text2Image: @prompts( name="Generate Image From User Input Text", - description="useful when you want to generate an image from a user input text and save it to a file. " - "like: generate an image of an object or something, or generate an image that includes some objects. " - "The input to this tool should be a string, representing the text used to generate image. ", + description=( + "useful when you want to generate an image from a user input text and save" + " it to a file. like: generate an image of an object or something, or" + " generate an image that includes some objects. The input to this tool" + " should be a string, representing the text used to generate image. " + ), ) def inference(self, text): image_filename = os.path.join("image", f"{str(uuid.uuid4())[:8]}.png") @@ -319,7 +324,8 @@ class Text2Image: image = self.pipe(prompt, negative_prompt=self.n_prompt).images[0] image.save(image_filename) print( - f"\nProcessed Text2Image, Input Text: {text}, Output Image: {image_filename}" + f"\nProcessed Text2Image, Input Text: {text}, Output Image:" + f" {image_filename}" ) return image_filename @@ -338,8 +344,11 @@ class ImageCaptioning: @prompts( name="Get Photo Description", - description="useful when you want to know what is inside the photo. receives image_path as input. " - "The input to this tool should be a string, representing the image_path. ", + description=( + "useful when you want to know what is inside the photo. receives image_path" + " as input. The input to this tool should be a string, representing the" + " image_path. " + ), ) def inference(self, image_path): inputs = self.processor(Image.open(image_path), return_tensors="pt").to( @@ -348,7 +357,8 @@ class ImageCaptioning: out = self.model.generate(**inputs) captions = self.processor.decode(out[0], skip_special_tokens=True) print( - f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text: {captions}" + f"\nProcessed ImageCaptioning, Input Image: {image_path}, Output Text:" + f" {captions}" ) return captions @@ -361,10 +371,12 @@ class Image2Canny: @prompts( name="Edge Detection On Image", - description="useful when you want to detect the edge of the image. " - "like: detect the edges of this image, or canny detection on image, " - "or perform edge detection on this image, or detect the canny image of this image. " - "The input to this tool should be a string, representing the image_path", + description=( + "useful when you want to detect the edge of the image. like: detect the" + " edges of this image, or canny detection on image, or perform edge" + " detection on this image, or detect the canny image of this image. The" + " input to this tool should be a string, representing the image_path" + ), ) def inference(self, inputs): image = Image.open(inputs) @@ -376,7 +388,8 @@ class Image2Canny: updated_image_path = get_new_image_name(inputs, func_name="edge") canny.save(updated_image_path) print( - f"\nProcessed Image2Canny, Input Image: {inputs}, Output Text: {updated_image_path}" + f"\nProcessed Image2Canny, Input Image: {inputs}, Output Text:" + f" {updated_image_path}" ) return updated_image_path @@ -410,11 +423,14 @@ class CannyText2Image: @prompts( name="Generate Image Condition On Canny Image", - description="useful when you want to generate a new real image from both the user description and a canny image." - " like: generate a real image of a object or something from this canny image," - " or generate a new real image of a object or something from this edge image. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the user description. ", + description=( + "useful when you want to generate a new real image from both the user" + " description and a canny image. like: generate a real image of a object or" + " something from this canny image, or generate a new real image of a object" + " or something from this edge image. The input to this tool should be a" + " comma separated string of two, representing the image_path and the user" + " description. " + ), ) def inference(self, inputs): image_path, instruct_text = inputs.split(",")[0], ",".join( @@ -435,8 +451,8 @@ class CannyText2Image: updated_image_path = get_new_image_name(image_path, func_name="canny2image") image.save(updated_image_path) print( - f"\nProcessed CannyText2Image, Input Canny: {image_path}, Input Text: {instruct_text}, " - f"Output Text: {updated_image_path}" + f"\nProcessed CannyText2Image, Input Canny: {image_path}, Input Text:" + f" {instruct_text}, Output Text: {updated_image_path}" ) return updated_image_path @@ -448,10 +464,13 @@ class Image2Line: @prompts( name="Line Detection On Image", - description="useful when you want to detect the straight line of the image. " - "like: detect the straight lines of this image, or straight line detection on image, " - "or perform straight line detection on this image, or detect the straight line image of this image. " - "The input to this tool should be a string, representing the image_path", + description=( + "useful when you want to detect the straight line of the image. like:" + " detect the straight lines of this image, or straight line detection on" + " image, or perform straight line detection on this image, or detect the" + " straight line image of this image. The input to this tool should be a" + " string, representing the image_path" + ), ) def inference(self, inputs): image = Image.open(inputs) @@ -459,7 +478,8 @@ class Image2Line: updated_image_path = get_new_image_name(inputs, func_name="line-of") mlsd.save(updated_image_path) print( - f"\nProcessed Image2Line, Input Image: {inputs}, Output Line: {updated_image_path}" + f"\nProcessed Image2Line, Input Image: {inputs}, Output Line:" + f" {updated_image_path}" ) return updated_image_path @@ -492,12 +512,14 @@ class LineText2Image: @prompts( name="Generate Image Condition On Line Image", - description="useful when you want to generate a new real image from both the user description " - "and a straight line image. " - "like: generate a real image of a object or something from this straight line image, " - "or generate a new real image of a object or something from this straight lines. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the user description. ", + description=( + "useful when you want to generate a new real image from both the user" + " description and a straight line image. like: generate a real image of a" + " object or something from this straight line image, or generate a new real" + " image of a object or something from this straight lines. The input to" + " this tool should be a comma separated string of two, representing the" + " image_path and the user description. " + ), ) def inference(self, inputs): image_path, instruct_text = inputs.split(",")[0], ",".join( @@ -518,8 +540,8 @@ class LineText2Image: updated_image_path = get_new_image_name(image_path, func_name="line2image") image.save(updated_image_path) print( - f"\nProcessed LineText2Image, Input Line: {image_path}, Input Text: {instruct_text}, " - f"Output Text: {updated_image_path}" + f"\nProcessed LineText2Image, Input Line: {image_path}, Input Text:" + f" {instruct_text}, Output Text: {updated_image_path}" ) return updated_image_path @@ -531,10 +553,13 @@ class Image2Hed: @prompts( name="Hed Detection On Image", - description="useful when you want to detect the soft hed boundary of the image. " - "like: detect the soft hed boundary of this image, or hed boundary detection on image, " - "or perform hed boundary detection on this image, or detect soft hed boundary image of this image. " - "The input to this tool should be a string, representing the image_path", + description=( + "useful when you want to detect the soft hed boundary of the image. like:" + " detect the soft hed boundary of this image, or hed boundary detection on" + " image, or perform hed boundary detection on this image, or detect soft" + " hed boundary image of this image. The input to this tool should be a" + " string, representing the image_path" + ), ) def inference(self, inputs): image = Image.open(inputs) @@ -542,7 +567,8 @@ class Image2Hed: updated_image_path = get_new_image_name(inputs, func_name="hed-boundary") hed.save(updated_image_path) print( - f"\nProcessed Image2Hed, Input Image: {inputs}, Output Hed: {updated_image_path}" + f"\nProcessed Image2Hed, Input Image: {inputs}, Output Hed:" + f" {updated_image_path}" ) return updated_image_path @@ -575,12 +601,14 @@ class HedText2Image: @prompts( name="Generate Image Condition On Soft Hed Boundary Image", - description="useful when you want to generate a new real image from both the user description " - "and a soft hed boundary image. " - "like: generate a real image of a object or something from this soft hed boundary image, " - "or generate a new real image of a object or something from this hed boundary. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the user description", + description=( + "useful when you want to generate a new real image from both the user" + " description and a soft hed boundary image. like: generate a real image of" + " a object or something from this soft hed boundary image, or generate a" + " new real image of a object or something from this hed boundary. The input" + " to this tool should be a comma separated string of two, representing the" + " image_path and the user description" + ), ) def inference(self, inputs): image_path, instruct_text = inputs.split(",")[0], ",".join( @@ -601,8 +629,8 @@ class HedText2Image: updated_image_path = get_new_image_name(image_path, func_name="hed2image") image.save(updated_image_path) print( - f"\nProcessed HedText2Image, Input Hed: {image_path}, Input Text: {instruct_text}, " - f"Output Image: {updated_image_path}" + f"\nProcessed HedText2Image, Input Hed: {image_path}, Input Text:" + f" {instruct_text}, Output Image: {updated_image_path}" ) return updated_image_path @@ -614,10 +642,12 @@ class Image2Scribble: @prompts( name="Sketch Detection On Image", - description="useful when you want to generate a scribble of the image. " - "like: generate a scribble of this image, or generate a sketch from this image, " - "detect the sketch from this image. " - "The input to this tool should be a string, representing the image_path", + description=( + "useful when you want to generate a scribble of the image. like: generate a" + " scribble of this image, or generate a sketch from this image, detect the" + " sketch from this image. The input to this tool should be a string," + " representing the image_path" + ), ) def inference(self, inputs): image = Image.open(inputs) @@ -625,7 +655,8 @@ class Image2Scribble: updated_image_path = get_new_image_name(inputs, func_name="scribble") scribble.save(updated_image_path) print( - f"\nProcessed Image2Scribble, Input Image: {inputs}, Output Scribble: {updated_image_path}" + f"\nProcessed Image2Scribble, Input Image: {inputs}, Output Scribble:" + f" {updated_image_path}" ) return updated_image_path @@ -659,10 +690,12 @@ class ScribbleText2Image: @prompts( name="Generate Image Condition On Sketch Image", - description="useful when you want to generate a new real image from both the user description and " - "a scribble image or a sketch image. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the user description", + description=( + "useful when you want to generate a new real image from both the user" + " description and a scribble image or a sketch image. The input to this" + " tool should be a comma separated string of two, representing the" + " image_path and the user description" + ), ) def inference(self, inputs): image_path, instruct_text = inputs.split(",")[0], ",".join( @@ -683,8 +716,8 @@ class ScribbleText2Image: updated_image_path = get_new_image_name(image_path, func_name="scribble2image") image.save(updated_image_path) print( - f"\nProcessed ScribbleText2Image, Input Scribble: {image_path}, Input Text: {instruct_text}, " - f"Output Image: {updated_image_path}" + f"\nProcessed ScribbleText2Image, Input Scribble: {image_path}, Input Text:" + f" {instruct_text}, Output Image: {updated_image_path}" ) return updated_image_path @@ -696,9 +729,11 @@ class Image2Pose: @prompts( name="Pose Detection On Image", - description="useful when you want to detect the human pose of the image. " - "like: generate human poses of this image, or generate a pose image from this image. " - "The input to this tool should be a string, representing the image_path", + description=( + "useful when you want to detect the human pose of the image. like: generate" + " human poses of this image, or generate a pose image from this image. The" + " input to this tool should be a string, representing the image_path" + ), ) def inference(self, inputs): image = Image.open(inputs) @@ -706,7 +741,8 @@ class Image2Pose: updated_image_path = get_new_image_name(inputs, func_name="human-pose") pose.save(updated_image_path) print( - f"\nProcessed Image2Pose, Input Image: {inputs}, Output Pose: {updated_image_path}" + f"\nProcessed Image2Pose, Input Image: {inputs}, Output Pose:" + f" {updated_image_path}" ) return updated_image_path @@ -742,12 +778,13 @@ class PoseText2Image: @prompts( name="Generate Image Condition On Pose Image", - description="useful when you want to generate a new real image from both the user description " - "and a human pose image. " - "like: generate a real image of a human from this human pose image, " - "or generate a new real image of a human from this pose. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the user description", + description=( + "useful when you want to generate a new real image from both the user" + " description and a human pose image. like: generate a real image of a" + " human from this human pose image, or generate a new real image of a human" + " from this pose. The input to this tool should be a comma separated string" + " of two, representing the image_path and the user description" + ), ) def inference(self, inputs): image_path, instruct_text = inputs.split(",")[0], ",".join( @@ -768,8 +805,8 @@ class PoseText2Image: updated_image_path = get_new_image_name(image_path, func_name="pose2image") image.save(updated_image_path) print( - f"\nProcessed PoseText2Image, Input Pose: {image_path}, Input Text: {instruct_text}, " - f"Output Image: {updated_image_path}" + f"\nProcessed PoseText2Image, Input Pose: {image_path}, Input Text:" + f" {instruct_text}, Output Image: {updated_image_path}" ) return updated_image_path @@ -802,11 +839,14 @@ class SegText2Image: @prompts( name="Generate Image Condition On Segmentations", - description="useful when you want to generate a new real image from both the user description and segmentations. " - "like: generate a real image of a object or something from this segmentation image, " - "or generate a new real image of a object or something from these segmentations. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the user description", + description=( + "useful when you want to generate a new real image from both the user" + " description and segmentations. like: generate a real image of a object or" + " something from this segmentation image, or generate a new real image of a" + " object or something from these segmentations. The input to this tool" + " should be a comma separated string of two, representing the image_path" + " and the user description" + ), ) def inference(self, inputs): image_path, instruct_text = inputs.split(",")[0], ",".join( @@ -827,8 +867,8 @@ class SegText2Image: updated_image_path = get_new_image_name(image_path, func_name="segment2image") image.save(updated_image_path) print( - f"\nProcessed SegText2Image, Input Seg: {image_path}, Input Text: {instruct_text}, " - f"Output Image: {updated_image_path}" + f"\nProcessed SegText2Image, Input Seg: {image_path}, Input Text:" + f" {instruct_text}, Output Image: {updated_image_path}" ) return updated_image_path @@ -840,9 +880,12 @@ class Image2Depth: @prompts( name="Predict Depth On Image", - description="useful when you want to detect depth of the image. like: generate the depth from this image, " - "or detect the depth map on this image, or predict the depth for this image. " - "The input to this tool should be a string, representing the image_path", + description=( + "useful when you want to detect depth of the image. like: generate the" + " depth from this image, or detect the depth map on this image, or predict" + " the depth for this image. The input to this tool should be a string," + " representing the image_path" + ), ) def inference(self, inputs): image = Image.open(inputs) @@ -854,7 +897,8 @@ class Image2Depth: updated_image_path = get_new_image_name(inputs, func_name="depth") depth.save(updated_image_path) print( - f"\nProcessed Image2Depth, Input Image: {inputs}, Output Depth: {updated_image_path}" + f"\nProcessed Image2Depth, Input Image: {inputs}, Output Depth:" + f" {updated_image_path}" ) return updated_image_path @@ -888,11 +932,14 @@ class DepthText2Image: @prompts( name="Generate Image Condition On Depth", - description="useful when you want to generate a new real image from both the user description and depth image. " - "like: generate a real image of a object or something from this depth image, " - "or generate a new real image of a object or something from the depth map. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the user description", + description=( + "useful when you want to generate a new real image from both the user" + " description and depth image. like: generate a real image of a object or" + " something from this depth image, or generate a new real image of a object" + " or something from the depth map. The input to this tool should be a comma" + " separated string of two, representing the image_path and the user" + " description" + ), ) def inference(self, inputs): image_path, instruct_text = inputs.split(",")[0], ",".join( @@ -913,8 +960,8 @@ class DepthText2Image: updated_image_path = get_new_image_name(image_path, func_name="depth2image") image.save(updated_image_path) print( - f"\nProcessed DepthText2Image, Input Depth: {image_path}, Input Text: {instruct_text}, " - f"Output Image: {updated_image_path}" + f"\nProcessed DepthText2Image, Input Depth: {image_path}, Input Text:" + f" {instruct_text}, Output Image: {updated_image_path}" ) return updated_image_path @@ -929,9 +976,11 @@ class Image2Normal: @prompts( name="Predict Normal Map On Image", - description="useful when you want to detect norm map of the image. " - "like: generate normal map from this image, or predict normal map of this image. " - "The input to this tool should be a string, representing the image_path", + description=( + "useful when you want to detect norm map of the image. like: generate" + " normal map from this image, or predict normal map of this image. The" + " input to this tool should be a string, representing the image_path" + ), ) def inference(self, inputs): image = Image.open(inputs) @@ -954,7 +1003,8 @@ class Image2Normal: updated_image_path = get_new_image_name(inputs, func_name="normal-map") image.save(updated_image_path) print( - f"\nProcessed Image2Normal, Input Image: {inputs}, Output Depth: {updated_image_path}" + f"\nProcessed Image2Normal, Input Image: {inputs}, Output Depth:" + f" {updated_image_path}" ) return updated_image_path @@ -988,11 +1038,14 @@ class NormalText2Image: @prompts( name="Generate Image Condition On Normal Map", - description="useful when you want to generate a new real image from both the user description and normal map. " - "like: generate a real image of a object or something from this normal map, " - "or generate a new real image of a object or something from the normal map. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the user description", + description=( + "useful when you want to generate a new real image from both the user" + " description and normal map. like: generate a real image of a object or" + " something from this normal map, or generate a new real image of a object" + " or something from the normal map. The input to this tool should be a" + " comma separated string of two, representing the image_path and the user" + " description" + ), ) def inference(self, inputs): image_path, instruct_text = inputs.split(",")[0], ",".join( @@ -1013,8 +1066,8 @@ class NormalText2Image: updated_image_path = get_new_image_name(image_path, func_name="normal2image") image.save(updated_image_path) print( - f"\nProcessed NormalText2Image, Input Normal: {image_path}, Input Text: {instruct_text}, " - f"Output Image: {updated_image_path}" + f"\nProcessed NormalText2Image, Input Normal: {image_path}, Input Text:" + f" {instruct_text}, Output Image: {updated_image_path}" ) return updated_image_path @@ -1031,9 +1084,12 @@ class VisualQuestionAnswering: @prompts( name="Answer Question About The Image", - description="useful when you need an answer for a question based on an image. " - "like: what is the background color of the last image, how many cats in this figure, what is in this figure. " - "The input to this tool should be a comma separated string of two, representing the image_path and the question", + description=( + "useful when you need an answer for a question based on an image. like:" + " what is the background color of the last image, how many cats in this" + " figure, what is in this figure. The input to this tool should be a comma" + " separated string of two, representing the image_path and the question" + ), ) def inference(self, inputs): image_path, question = inputs.split(",")[0], ",".join(inputs.split(",")[1:]) @@ -1044,8 +1100,8 @@ class VisualQuestionAnswering: out = self.model.generate(**inputs) answer = self.processor.decode(out[0], skip_special_tokens=True) print( - f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, " - f"Output Answer: {answer}" + f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input" + f" Question: {question}, Output Answer: {answer}" ) return answer @@ -1245,12 +1301,13 @@ class Segmenting: @prompts( name="Segment the Image", - description="useful when you want to segment all the part of the image, but not segment a certain object." - "like: segment all the object in this image, or generate segmentations on this image, " - "or segment the image," - "or perform segmentation on this image, " - "or segment all the object in this image." - "The input to this tool should be a string, representing the image_path", + description=( + "useful when you want to segment all the part of the image, but not segment" + " a certain object.like: segment all the object in this image, or generate" + " segmentations on this image, or segment the image,or perform segmentation" + " on this image, or segment all the object in this image.The input to this" + " tool should be a string, representing the image_path" + ), ) def inference_all(self, image_path): image = cv2.imread(image_path) @@ -1401,9 +1458,12 @@ class Text2Box: @prompts( name="Detect the Give Object", - description="useful when you only want to detect or find out given objects in the picture" - "The input to this tool should be a comma separated string of two, " - "representing the image_path, the text description of the object to be found", + description=( + "useful when you only want to detect or find out given objects in the" + " pictureThe input to this tool should be a comma separated string of two," + " representing the image_path, the text description of the object to be" + " found" + ), ) def inference(self, inputs): image_path, det_prompt = inputs.split(",") @@ -1427,8 +1487,8 @@ class Text2Box: updated_image = image_with_box.resize(size) updated_image.save(updated_image_path) print( - f"\nProcessed ObejectDetecting, Input Image: {image_path}, Object to be Detect {det_prompt}, " - f"Output Image: {updated_image_path}" + f"\nProcessed ObejectDetecting, Input Image: {image_path}, Object to be" + f" Detect {det_prompt}, Output Image: {updated_image_path}" ) return updated_image_path @@ -1483,7 +1543,8 @@ class InfinityOutPainting: out = self.ImageVQA.model.generate(**inputs) answer = self.ImageVQA.processor.decode(out[0], skip_special_tokens=True) print( - f"\nProcessed VisualQuestionAnswering, Input Question: {question}, Output Answer: {answer}" + f"\nProcessed VisualQuestionAnswering, Input Question: {question}, Output" + f" Answer: {answer}" ) return answer @@ -1499,9 +1560,9 @@ class InfinityOutPainting: def check_prompt(self, prompt): check = ( - f"Here is a paragraph with adjectives. " + "Here is a paragraph with adjectives. " f"{prompt} " - f"Please change all plural forms in the adjectives to singular forms. " + "Please change all plural forms in the adjectives to singular forms. " ) return self.llm(check) @@ -1512,13 +1573,12 @@ class InfinityOutPainting: ) style = self.get_BLIP_vqa(image, "what is the style of this image") imagine_prompt = ( - f"let's pretend you are an excellent painter and now " - f"there is an incomplete painting with {BLIP_caption} in the center, " - f"please imagine the complete painting and describe it" - f"you should consider the background color is {background_color}, the style is {style}" - f"You should make the painting as vivid and realistic as possible" - f"You can not use words like painting or picture" - f"and you should use no more than 50 words to describe it" + "let's pretend you are an excellent painter and now there is an incomplete" + f" painting with {BLIP_caption} in the center, please imagine the complete" + " painting and describe ityou should consider the background color is" + f" {background_color}, the style is {style}You should make the painting as" + " vivid and realistic as possibleYou can not use words like painting or" + " pictureand you should use no more than 50 words to describe it" ) caption = self.llm(imagine_prompt) if imagine else BLIP_caption caption = self.check_prompt(caption) @@ -1580,9 +1640,12 @@ class InfinityOutPainting: @prompts( name="Extend An Image", - description="useful when you need to extend an image into a larger image." - "like: extend the image into a resolution of 2048x1024, extend the image into 2048x1024. " - "The input to this tool should be a comma separated string of two, representing the image_path and the resolution of widthxheight", + description=( + "useful when you need to extend an image into a larger image.like: extend" + " the image into a resolution of 2048x1024, extend the image into" + " 2048x1024. The input to this tool should be a comma separated string of" + " two, representing the image_path and the resolution of widthxheight" + ), ) def inference(self, inputs): image_path, resolution = inputs.split(",") @@ -1594,8 +1657,8 @@ class InfinityOutPainting: updated_image_path = get_new_image_name(image_path, func_name="outpainting") out_painted_image.save(updated_image_path) print( - f"\nProcessed InfinityOutPainting, Input Image: {image_path}, Input Resolution: {resolution}, " - f"Output Image: {updated_image_path}" + f"\nProcessed InfinityOutPainting, Input Image: {image_path}, Input" + f" Resolution: {resolution}, Output Image: {updated_image_path}" ) return updated_image_path @@ -1610,12 +1673,13 @@ class ObjectSegmenting: @prompts( name="Segment the given object", - description="useful when you only want to segment the certain objects in the picture" - "according to the given text" - "like: segment the cat," - "or can you segment an obeject for me" - "The input to this tool should be a comma separated string of two, " - "representing the image_path, the text description of the object to be found", + description=( + "useful when you only want to segment the certain objects in the" + " pictureaccording to the given textlike: segment the cat,or can you" + " segment an obeject for meThe input to this tool should be a comma" + " separated string of two, representing the image_path, the text" + " description of the object to be found" + ), ) def inference(self, inputs): image_path, det_prompt = inputs.split(",") @@ -1627,8 +1691,8 @@ class ObjectSegmenting: image_pil, image_path, boxes_filt, pred_phrases ) print( - f"\nProcessed ObejectSegmenting, Input Image: {image_path}, Object to be Segment {det_prompt}, " - f"Output Image: {updated_image_path}" + f"\nProcessed ObejectSegmenting, Input Image: {image_path}, Object to be" + f" Segment {det_prompt}, Output Image: {updated_image_path}" ) return updated_image_path @@ -1710,10 +1774,12 @@ class ImageEditing: @prompts( name="Remove Something From The Photo", - description="useful when you want to remove and object or something from the photo " - "from its description or location. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the object need to be removed. ", + description=( + "useful when you want to remove and object or something from the photo " + "from its description or location. " + "The input to this tool should be a comma separated string of two, " + "representing the image_path and the object need to be removed. " + ), ) def inference_remove(self, inputs): image_path, to_be_removed_txt = inputs.split(",")[0], ",".join( @@ -1725,10 +1791,12 @@ class ImageEditing: @prompts( name="Replace Something From The Photo", - description="useful when you want to replace an object from the object description or " - "location with another object from its description. " - "The input to this tool should be a comma separated string of three, " - "representing the image_path, the object to be replaced, the object to be replaced with ", + description=( + "useful when you want to replace an object from the object description or" + " location with another object from its description. The input to this tool" + " should be a comma separated string of three, representing the image_path," + " the object to be replaced, the object to be replaced with " + ), ) def inference_replace_sam(self, inputs): image_path, to_be_replaced_txt, replace_with_txt = inputs.split(",") @@ -1758,8 +1826,9 @@ class ImageEditing: updated_image = updated_image.resize(image_pil.size) updated_image.save(updated_image_path) print( - f"\nProcessed ImageEditing, Input Image: {image_path}, Replace {to_be_replaced_txt} to {replace_with_txt}, " - f"Output Image: {updated_image_path}" + f"\nProcessed ImageEditing, Input Image: {image_path}, Replace" + f" {to_be_replaced_txt} to {replace_with_txt}, Output Image:" + f" {updated_image_path}" ) return updated_image_path @@ -1782,8 +1851,10 @@ class BackgroundRemoving: @prompts( name="Remove the background", - description="useful when you want to extract the object or remove the background," - "the input should be a string image_path", + description=( + "useful when you want to extract the object or remove the background," + "the input should be a string image_path" + ), ) def inference(self, image_path): """ @@ -1833,7 +1904,8 @@ class MultiModalVisualAgent: if "ImageCaptioning" not in load_dict: raise ValueError( - "You have to load ImageCaptioning as a basic function for MultiModalVisualAgent" + "You have to load ImageCaptioning as a basic function for" + " MultiModalVisualAgent" ) self.models = {} @@ -1944,10 +2016,21 @@ class MultiModalVisualAgent: description = self.models["ImageCaptioning"].inference(image_filename) if lang == "Chinese": - Human_prompt = f'\nHuman: 提供一张名为 {image_filename}的图片。它的描述是: {description}。 这些信息帮助你理解这个图像,但是你应该使用工具来完成下面的任务,而不是直接从我的描述中想象。 如果你明白了, 说 "收到". \n' + Human_prompt = ( + f"\nHuman: 提供一张名为 {image_filename}的图片。它的描述是:" + f" {description}。 这些信息帮助你理解这个图像," + "但是你应该使用工具来完成下面的任务,而不是直接从我的描述中想象。" + ' 如果你明白了, 说 "收到". \n' + ) AI_prompt = "收到。 " else: - Human_prompt = f'\nHuman: provide a figure named {image_filename}. The description is: {description}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say "Received". \n' + Human_prompt = ( + f"\nHuman: provide a figure named {image_filename}. The description is:" + f" {description}. This information helps you to understand this image," + " but you should use tools to finish following tasks, rather than" + " directly imagine from my description. If you understand, say" + ' "Received". \n' + ) AI_prompt = "Received. " self.agent.memory.buffer = ( diff --git a/swarms/agents/profitpilot.py b/swarms/agents/profitpilot.py index 8f6927c4..6858dc72 100644 --- a/swarms/agents/profitpilot.py +++ b/swarms/agents/profitpilot.py @@ -16,7 +16,6 @@ from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Chroma from pydantic import BaseModel, Field from swarms.prompts.sales import SALES_AGENT_TOOLS_PROMPT, conversation_stages -from swarms.tools.interpreter_tool import compile # classes @@ -164,14 +163,10 @@ def get_tools(product_catalog): Tool( name="ProductSearch", func=knowledge_base.run, - description="useful for when you need to answer questions about product information", + description=( + "useful for when you need to answer questions about product information" + ), ), - # Interpreter - Tool( - name="Code Interepeter", - func=compile, - description="Useful when you need to run code locally, such as Python, Javascript, Shell, and more.", - ) # omnimodal agent ] @@ -231,7 +226,10 @@ class SalesConvoOutputParser(AgentOutputParser): # TODO - this is not entirely reliable, sometimes results in an error. return AgentFinish( { - "output": "I apologize, I was unable to find the answer to your question. Is there anything else I can help with?" + "output": ( + "I apologize, I was unable to find the answer to your question." + " Is there anything else I can help with?" + ) }, text, ) @@ -257,21 +255,62 @@ class ProfitPilot(Chain, BaseModel): use_tools: bool = False conversation_stage_dict: Dict = { - "1": "Introduction: Start the conversation by introducing yourself and your company. Be polite and respectful while keeping the tone of the conversation professional. Your greeting should be welcoming. Always clarify in your greeting the reason why you are contacting the prospect.", - "2": "Qualification: Qualify the prospect by confirming if they are the right person to talk to regarding your product/service. Ensure that they have the authority to make purchasing decisions.", - "3": "Value proposition: Briefly explain how your product/service can benefit the prospect. Focus on the unique selling points and value proposition of your product/service that sets it apart from competitors.", - "4": "Needs analysis: Ask open-ended questions to uncover the prospect's needs and pain points. Listen carefully to their responses and take notes.", - "5": "Solution presentation: Based on the prospect's needs, present your product/service as the solution that can address their pain points.", - "6": "Objection handling: Address any objections that the prospect may have regarding your product/service. Be prepared to provide evidence or testimonials to support your claims.", - "7": "Close: Ask for the sale by proposing a next step. This could be a demo, a trial or a meeting with decision-makers. Ensure to summarize what has been discussed and reiterate the benefits.", + "1": ( + "Introduction: Start the conversation by introducing yourself and your" + " company. Be polite and respectful while keeping the tone of the" + " conversation professional. Your greeting should be welcoming. Always" + " clarify in your greeting the reason why you are contacting the prospect." + ), + "2": ( + "Qualification: Qualify the prospect by confirming if they are the right" + " person to talk to regarding your product/service. Ensure that they have" + " the authority to make purchasing decisions." + ), + "3": ( + "Value proposition: Briefly explain how your product/service can benefit" + " the prospect. Focus on the unique selling points and value proposition of" + " your product/service that sets it apart from competitors." + ), + "4": ( + "Needs analysis: Ask open-ended questions to uncover the prospect's needs" + " and pain points. Listen carefully to their responses and take notes." + ), + "5": ( + "Solution presentation: Based on the prospect's needs, present your" + " product/service as the solution that can address their pain points." + ), + "6": ( + "Objection handling: Address any objections that the prospect may have" + " regarding your product/service. Be prepared to provide evidence or" + " testimonials to support your claims." + ), + "7": ( + "Close: Ask for the sale by proposing a next step. This could be a demo, a" + " trial or a meeting with decision-makers. Ensure to summarize what has" + " been discussed and reiterate the benefits." + ), } salesperson_name: str = "Ted Lasso" salesperson_role: str = "Business Development Representative" company_name: str = "Sleep Haven" - company_business: str = "Sleep Haven is a premium mattress company that provides customers with the most comfortable and supportive sleeping experience possible. We offer a range of high-quality mattresses, pillows, and bedding accessories that are designed to meet the unique needs of our customers." - company_values: str = "Our mission at Sleep Haven is to help people achieve a better night's sleep by providing them with the best possible sleep solutions. We believe that quality sleep is essential to overall health and well-being, and we are committed to helping our customers achieve optimal sleep by offering exceptional products and customer service." - conversation_purpose: str = "find out whether they are looking to achieve better sleep via buying a premier mattress." + company_business: str = ( + "Sleep Haven is a premium mattress company that provides customers with the" + " most comfortable and supportive sleeping experience possible. We offer a" + " range of high-quality mattresses, pillows, and bedding accessories that are" + " designed to meet the unique needs of our customers." + ) + company_values: str = ( + "Our mission at Sleep Haven is to help people achieve a better night's sleep by" + " providing them with the best possible sleep solutions. We believe that" + " quality sleep is essential to overall health and well-being, and we are" + " committed to helping our customers achieve optimal sleep by offering" + " exceptional products and customer service." + ) + conversation_purpose: str = ( + "find out whether they are looking to achieve better sleep via buying a premier" + " mattress." + ) conversation_type: str = "call" def retrieve_conversation_stage(self, key): @@ -419,14 +458,32 @@ config = dict( salesperson_name="Ted Lasso", salesperson_role="Business Development Representative", company_name="Sleep Haven", - company_business="Sleep Haven is a premium mattress company that provides customers with the most comfortable and supportive sleeping experience possible. We offer a range of high-quality mattresses, pillows, and bedding accessories that are designed to meet the unique needs of our customers.", - company_values="Our mission at Sleep Haven is to help people achieve a better night's sleep by providing them with the best possible sleep solutions. We believe that quality sleep is essential to overall health and well-being, and we are committed to helping our customers achieve optimal sleep by offering exceptional products and customer service.", - conversation_purpose="find out whether they are looking to achieve better sleep via buying a premier mattress.", + company_business=( + "Sleep Haven is a premium mattress company that provides customers with the" + " most comfortable and supportive sleeping experience possible. We offer a" + " range of high-quality mattresses, pillows, and bedding accessories that are" + " designed to meet the unique needs of our customers." + ), + company_values=( + "Our mission at Sleep Haven is to help people achieve a better night's sleep by" + " providing them with the best possible sleep solutions. We believe that" + " quality sleep is essential to overall health and well-being, and we are" + " committed to helping our customers achieve optimal sleep by offering" + " exceptional products and customer service." + ), + conversation_purpose=( + "find out whether they are looking to achieve better sleep via buying a premier" + " mattress." + ), conversation_history=[], conversation_type="call", conversation_stage=conversation_stages.get( "1", - "Introduction: Start the conversation by introducing yourself and your company. Be polite and respectful while keeping the tone of the conversation professional.", + ( + "Introduction: Start the conversation by introducing yourself and your" + " company. Be polite and respectful while keeping the tone of the" + " conversation professional." + ), ), use_tools=True, product_catalog="sample_product_catalog.txt", diff --git a/swarms/agents/registry.py b/swarms/agents/registry.py index b53b5714..aa1f1375 100644 --- a/swarms/agents/registry.py +++ b/swarms/agents/registry.py @@ -19,7 +19,8 @@ class Registry(BaseModel): def build(self, type: str, **kwargs): if type not in self.entries: raise ValueError( - f'{type} is not registered. Please register with the .register("{type}") method provided in {self.name} registry' + f"{type} is not registered. Please register with the" + f' .register("{type}") method provided in {self.name} registry' ) return self.entries[type](**kwargs) diff --git a/swarms/chunkers/__init__.py b/swarms/chunkers/__init__.py index 5e09586b..159e8d5b 100644 --- a/swarms/chunkers/__init__.py +++ b/swarms/chunkers/__init__.py @@ -3,7 +3,6 @@ # from swarms.chunkers.text import TextChunker # from swarms.chunkers.pdf import PdfChunker - # __all__ = [ # "BaseChunker", # "ChunkSeparator", diff --git a/swarms/chunkers/base.py b/swarms/chunkers/base.py index 464f51e4..0fabdcef 100644 --- a/swarms/chunkers/base.py +++ b/swarms/chunkers/base.py @@ -1,10 +1,13 @@ from __future__ import annotations + from abc import ABC from typing import Optional -from attr import define, field, Factory + +from attr import Factory, define, field from griptape.artifacts import TextArtifact -from swarms.chunkers.chunk_seperators import ChunkSeparator -from griptape.tokenizers import OpenAiTokenizer + +from swarms.chunkers.chunk_seperator import ChunkSeparator +from swarms.models.openai_tokenizer import OpenAITokenizer @define @@ -16,6 +19,24 @@ class BaseChunker(ABC): Usage: -------------- + from swarms.chunkers.base import BaseChunker + from swarms.chunkers.chunk_seperator import ChunkSeparator + + class PdfChunker(BaseChunker): + DEFAULT_SEPARATORS = [ + ChunkSeparator("\n\n"), + ChunkSeparator(". "), + ChunkSeparator("! "), + ChunkSeparator("? "), + ChunkSeparator(" "), + ] + + # Example + pdf = "swarmdeck.pdf" + chunker = PdfChunker() + chunks = chunker.chunk(pdf) + print(chunks) + """ @@ -26,10 +47,10 @@ class BaseChunker(ABC): default=Factory(lambda self: self.DEFAULT_SEPARATORS, takes_self=True), kw_only=True, ) - tokenizer: OpenAiTokenizer = field( + tokenizer: OpenAITokenizer = field( default=Factory( - lambda: OpenAiTokenizer( - model=OpenAiTokenizer.DEFAULT_OPENAI_GPT_3_CHAT_MODEL + lambda: OpenAITokenizer( + model=OpenAITokenizer.DEFAULT_OPENAI_GPT_3_CHAT_MODEL ) ), kw_only=True, @@ -47,7 +68,7 @@ class BaseChunker(ABC): def _chunk_recursively( self, chunk: str, current_separator: Optional[ChunkSeparator] = None ) -> list[str]: - token_count = self.tokenizer.token_count(chunk) + token_count = self.tokenizer.count_tokens(chunk) if token_count <= self.max_tokens: return [chunk] diff --git a/swarms/chunkers/markdown.py b/swarms/chunkers/markdown.py index 6c0e755f..7836b0a7 100644 --- a/swarms/chunkers/markdown.py +++ b/swarms/chunkers/markdown.py @@ -15,3 +15,10 @@ class MarkdownChunker(BaseChunker): ChunkSeparator("? "), ChunkSeparator(" "), ] + + +# # Example using chunker to chunk a markdown file +# file = open("README.md", "r") +# text = file.read() +# chunker = MarkdownChunker() +# chunks = chunker.chunk(text) diff --git a/swarms/chunkers/omni_chunker.py b/swarms/chunkers/omni_chunker.py new file mode 100644 index 00000000..70a11380 --- /dev/null +++ b/swarms/chunkers/omni_chunker.py @@ -0,0 +1,117 @@ +""" +Omni Chunker is a chunker that chunks all files into select chunks of size x strings + +Usage: +-------------- +from swarms.chunkers.omni_chunker import OmniChunker + +# Example +pdf = "swarmdeck.pdf" +chunker = OmniChunker(chunk_size=1000, beautify=True) +chunks = chunker(pdf) +print(chunks) + + +""" +from dataclasses import dataclass +from typing import List, Optional, Callable +from termcolor import colored +import os +import sys + + +@dataclass +class OmniChunker: + """ """ + + chunk_size: int = 1000 + beautify: bool = False + use_tokenizer: bool = False + tokenizer: Optional[Callable[[str], List[str]]] = None + + def __call__(self, file_path: str) -> List[str]: + """ + Chunk the given file into parts of size `chunk_size`. + + Args: + file_path (str): The path to the file to chunk. + + Returns: + List[str]: A list of string chunks from the file. + """ + if not os.path.isfile(file_path): + print(colored("The file does not exist.", "red")) + return [] + + file_extension = os.path.splitext(file_path)[1] + try: + with open(file_path, "rb") as file: + content = file.read() + # Decode content based on MIME type or file extension + decoded_content = self.decode_content(content, file_extension) + chunks = self.chunk_content(decoded_content) + return chunks + + except Exception as e: + print(colored(f"Error reading file: {e}", "red")) + return [] + + def decode_content(self, content: bytes, file_extension: str) -> str: + """ + Decode the content of the file based on its MIME type or file extension. + + Args: + content (bytes): The content of the file. + file_extension (str): The file extension of the file. + + Returns: + str: The decoded content of the file. + """ + # Add logic to handle different file types based on the extension + # For simplicity, this example assumes text files encoded in utf-8 + try: + return content.decode("utf-8") + except UnicodeDecodeError as e: + print( + colored( + f"Could not decode file with extension {file_extension}: {e}", + "yellow", + ) + ) + return "" + + def chunk_content(self, content: str) -> List[str]: + """ + Split the content into chunks of size `chunk_size`. + + Args: + content (str): The content to chunk. + + Returns: + List[str]: The list of chunks. + """ + return [ + content[i : i + self.chunk_size] + for i in range(0, len(content), self.chunk_size) + ] + + def __str__(self): + return f"OmniChunker(chunk_size={self.chunk_size}, beautify={self.beautify})" + + def metrics(self): + return { + "chunk_size": self.chunk_size, + "beautify": self.beautify, + } + + def print_dashboard(self): + print( + colored( + f""" + Omni Chunker + ------------ + {self.metrics()} + """, + "cyan", + ) + ) diff --git a/swarms/chunkers/pdf.py b/swarms/chunkers/pdf.py index 206c74f3..710134a0 100644 --- a/swarms/chunkers/pdf.py +++ b/swarms/chunkers/pdf.py @@ -10,3 +10,10 @@ class PdfChunker(BaseChunker): ChunkSeparator("? "), ChunkSeparator(" "), ] + + +# # Example +# pdf = "swarmdeck.pdf" +# chunker = PdfChunker() +# chunks = chunker.chunk(pdf) +# print(chunks) diff --git a/swarms/loaders/base.py b/swarms/loaders/base.py index a59a93e2..afeeb231 100644 --- a/swarms/loaders/base.py +++ b/swarms/loaders/base.py @@ -15,7 +15,6 @@ if TYPE_CHECKING: from haystack.schema import Document as HaystackDocument from semantic_kernel.memory.memory_record import MemoryRecord - #### DEFAULT_TEXT_NODE_TMPL = "{metadata_str}\n\n{content}" DEFAULT_METADATA_TMPL = "{key}: {value}" @@ -125,7 +124,6 @@ class BaseNode(BaseComponent): embedding: Optional[List[float]] = Field( default=None, description="Embedding of the node." ) - """" metadata fields - injected as part of the text shown to LLMs as context diff --git a/swarms/memory/chroma.py b/swarms/memory/chroma.py index 422d0a67..67ba4cb2 100644 --- a/swarms/memory/chroma.py +++ b/swarms/memory/chroma.py @@ -460,7 +460,7 @@ class Chroma(VectorStore): """ if self._embedding_function is None: raise ValueError( - "For MMR search, you must specify an embedding function on" "creation." + "For MMR search, you must specify an embedding function oncreation." ) embedding = self._embedding_function.embed_query(query) diff --git a/swarms/memory/schemas.py b/swarms/memory/schemas.py index 0405323d..bbc71bc2 100644 --- a/swarms/memory/schemas.py +++ b/swarms/memory/schemas.py @@ -111,7 +111,10 @@ class Step(StepRequestBody): output: Optional[str] = Field( None, description="Output of the task step.", - example="I am going to use the write_to_file command and write Washington to a file called output.txt Callable: + """Validate specified keyword args are mutually exclusive.""" + + def decorator(func: Callable) -> Callable: + @functools.wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + """Validate exactly one arg in each group is not None.""" + counts = [ + sum(1 for arg in arg_group if kwargs.get(arg) is not None) + for arg_group in arg_groups + ] + invalid_groups = [i for i, count in enumerate(counts) if count != 1] + if invalid_groups: + invalid_group_names = [", ".join(arg_groups[i]) for i in invalid_groups] + raise ValueError( + "Exactly one argument in each of the following" + " groups must be defined:" + f" {', '.join(invalid_group_names)}" + ) + return func(*args, **kwargs) + + return wrapper + + return decorator + + +def raise_for_status_with_text(response: Response) -> None: + """Raise an error with the response text.""" + try: + response.raise_for_status() + except HTTPError as e: + raise ValueError(response.text) from e + + +@contextlib.contextmanager +def mock_now(dt_value): # type: ignore + """Context manager for mocking out datetime.now() in unit tests. + + Example: + with mock_now(datetime.datetime(2011, 2, 3, 10, 11)): + assert datetime.datetime.now() == datetime.datetime(2011, 2, 3, 10, 11) """ - Anthropic large language models. + class MockDateTime(datetime.datetime): + """Mock datetime.datetime.now() with a fixed datetime.""" + @classmethod + def now(cls): # type: ignore + # Create a copy of dt_value. + return datetime.datetime( + dt_value.year, + dt_value.month, + dt_value.day, + dt_value.hour, + dt_value.minute, + dt_value.second, + dt_value.microsecond, + dt_value.tzinfo, + ) + + real_datetime = datetime.datetime + datetime.datetime = MockDateTime + try: + yield datetime.datetime + finally: + datetime.datetime = real_datetime + + +def guard_import( + module_name: str, *, pip_name: Optional[str] = None, package: Optional[str] = None +) -> Any: + """Dynamically imports a module and raises a helpful exception if the module is not + installed.""" + try: + module = importlib.import_module(module_name, package) + except ImportError: + raise ImportError( + f"Could not import {module_name} python package. " + f"Please install it with `pip install {pip_name or module_name}`." + ) + return module + + +def check_package_version( + package: str, + lt_version: Optional[str] = None, + lte_version: Optional[str] = None, + gt_version: Optional[str] = None, + gte_version: Optional[str] = None, +) -> None: + """Check the version of a package.""" + imported_version = parse(version(package)) + if lt_version is not None and imported_version >= parse(lt_version): + raise ValueError( + f"Expected {package} version to be < {lt_version}. Received " + f"{imported_version}." + ) + if lte_version is not None and imported_version > parse(lte_version): + raise ValueError( + f"Expected {package} version to be <= {lte_version}. Received " + f"{imported_version}." + ) + if gt_version is not None and imported_version <= parse(gt_version): + raise ValueError( + f"Expected {package} version to be > {gt_version}. Received " + f"{imported_version}." + ) + if gte_version is not None and imported_version < parse(gte_version): + raise ValueError( + f"Expected {package} version to be >= {gte_version}. Received " + f"{imported_version}." + ) + + +def get_pydantic_field_names(pydantic_cls: Any) -> Set[str]: + """Get field names, including aliases, for a pydantic class. Args: + pydantic_cls: Pydantic class.""" + all_required_field_names = set() + for field in pydantic_cls.__fields__.values(): + all_required_field_names.add(field.name) + if field.has_alias: + all_required_field_names.add(field.alias) + return all_required_field_names +def build_extra_kwargs( + extra_kwargs: Dict[str, Any], + values: Dict[str, Any], + all_required_field_names: Set[str], +) -> Dict[str, Any]: + """Build extra kwargs from values and extra_kwargs. + + Args: + extra_kwargs: Extra kwargs passed in by user. + values: Values passed in by user. + all_required_field_names: All required field names for the pydantic class. """ + for field_name in list(values): + if field_name in extra_kwargs: + raise ValueError(f"Found {field_name} supplied twice.") + if field_name not in all_required_field_names: + warnings.warn( + f"""WARNING! {field_name} is not default parameter. + {field_name} was transferred to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra_kwargs[field_name] = values.pop(field_name) - def __init__( - self, - model="claude-2", - max_tokens_to_sample=256, - temperature=None, - top_k=None, - top_p=None, - streaming=False, - default_request_timeout=None, - ): - self.model = model - self.max_tokens_to_sample = max_tokens_to_sample - self.temperature = temperature - self.top_k = top_k - self.top_p = top_p - self.streaming = streaming - self.default_request_timeout = default_request_timeout or 600 - self.anthropic_api_url = os.getenv( - "ANTHROPIC_API_URL", "https://api.anthropic.com" + invalid_model_kwargs = all_required_field_names.intersection(extra_kwargs.keys()) + if invalid_model_kwargs: + raise ValueError( + f"Parameters {invalid_model_kwargs} should be specified explicitly. " + "Instead they were passed in as part of `model_kwargs` parameter." ) - self.anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") - def _default_params(self): + return extra_kwargs + + +def convert_to_secret_str(value: Union[SecretStr, str]) -> SecretStr: + """Convert a string to a SecretStr if needed.""" + if isinstance(value, SecretStr): + return value + return SecretStr(value) + + +class _AnthropicCommon(BaseLanguageModel): + client: Any = None #: :meta private: + async_client: Any = None #: :meta private: + model: str = Field(default="claude-2", alias="model_name") + """Model name to use.""" + + max_tokens_to_sample: int = Field(default=256, alias="max_tokens") + """Denotes the number of tokens to predict per generation.""" + + temperature: Optional[float] = None + """A non-negative float that tunes the degree of randomness in generation.""" + + top_k: Optional[int] = None + """Number of most likely tokens to consider at each step.""" + + top_p: Optional[float] = None + """Total probability mass of tokens to consider at each step.""" + + streaming: bool = False + """Whether to stream the results.""" + + default_request_timeout: Optional[float] = None + """Timeout for requests to Anthropic Completion API. Default is 600 seconds.""" + + anthropic_api_url: Optional[str] = None + + anthropic_api_key: Optional[SecretStr] = None + + HUMAN_PROMPT: Optional[str] = None + AI_PROMPT: Optional[str] = None + count_tokens: Optional[Callable[[str], int]] = None + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + + @root_validator(pre=True) + def build_extra(cls, values: Dict) -> Dict: + extra = values.get("model_kwargs", {}) + all_required_field_names = get_pydantic_field_names(cls) + values["model_kwargs"] = build_extra_kwargs( + extra, values, all_required_field_names + ) + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + values["anthropic_api_key"] = convert_to_secret_str( + get_from_dict_or_env(values, "anthropic_api_key", "ANTHROPIC_API_KEY") + ) + # Get custom api url from environment. + values["anthropic_api_url"] = get_from_dict_or_env( + values, + "anthropic_api_url", + "ANTHROPIC_API_URL", + default="https://api.anthropic.com", + ) + + try: + import anthropic + + check_package_version("anthropic", gte_version="0.3") + values["client"] = anthropic.Anthropic( + base_url=values["anthropic_api_url"], + api_key=values["anthropic_api_key"].get_secret_value(), + timeout=values["default_request_timeout"], + ) + values["async_client"] = anthropic.AsyncAnthropic( + base_url=values["anthropic_api_url"], + api_key=values["anthropic_api_key"].get_secret_value(), + timeout=values["default_request_timeout"], + ) + values["HUMAN_PROMPT"] = anthropic.HUMAN_PROMPT + values["AI_PROMPT"] = anthropic.AI_PROMPT + values["count_tokens"] = values["client"].count_tokens + + except ImportError: + raise ImportError( + "Could not import anthropic python package. " + "Please it install it with `pip install anthropic`." + ) + return values + + @property + def _default_params(self) -> Mapping[str, Any]: """Get the default parameters for calling Anthropic API.""" d = { "max_tokens_to_sample": self.max_tokens_to_sample, @@ -47,32 +298,229 @@ class Anthropic: d["top_k"] = self.top_k if self.top_p is not None: d["top_p"] = self.top_p - return d - - def run(self, task: str, stop=None): - """Call out to Anthropic's completion endpoint.""" - stop = stop or [] - params = self._default_params() - headers = {"Authorization": f"Bearer {self.anthropic_api_key}"} - data = {"prompt": task, "stop_sequences": stop, **params} - response = requests.post( - f"{self.anthropic_api_url}/completions", - headers=headers, - json=data, - timeout=self.default_request_timeout, + return {**d, **self.model_kwargs} + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{}, **self._default_params} + + def _get_anthropic_stop(self, stop: Optional[List[str]] = None) -> List[str]: + if not self.HUMAN_PROMPT or not self.AI_PROMPT: + raise NameError("Please ensure the anthropic package is loaded") + + if stop is None: + stop = [] + + # Never want model to invent new turns of Human / Assistant dialog. + stop.extend([self.HUMAN_PROMPT]) + + return stop + + +class Anthropic(LLM, _AnthropicCommon): + """Anthropic large language models. + + To use, you should have the ``anthropic`` python package installed, and the + environment variable ``ANTHROPIC_API_KEY`` set with your API key, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + + import anthropic + from langchain.llms import Anthropic + + model = Anthropic(model="", anthropic_api_key="my-api-key") + + # Simplest invocation, automatically wrapped with HUMAN_PROMPT + # and AI_PROMPT. + response = model("What are the biggest risks facing humanity?") + + # Or if you want to use the chat mode, build a few-shot-prompt, or + # put words in the Assistant's mouth, use HUMAN_PROMPT and AI_PROMPT: + raw_prompt = "What are the biggest risks facing humanity?" + prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}" + response = model(prompt) + """ + + class Config: + """Configuration for this pydantic object.""" + + allow_population_by_field_name = True + arbitrary_types_allowed = True + + @root_validator() + def raise_warning(cls, values: Dict) -> Dict: + """Raise warning that this class is deprecated.""" + warnings.warn( + "This Anthropic LLM is deprecated. " + "Please use `from langchain.chat_models import ChatAnthropic` instead" + ) + return values + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "anthropic-llm" + + def _wrap_prompt(self, prompt: str) -> str: + if not self.HUMAN_PROMPT or not self.AI_PROMPT: + raise NameError("Please ensure the anthropic package is loaded") + + if prompt.startswith(self.HUMAN_PROMPT): + return prompt # Already wrapped. + + # Guard against common errors in specifying wrong number of newlines. + corrected_prompt, n_subs = re.subn(r"^\n*Human:", self.HUMAN_PROMPT, prompt) + if n_subs == 1: + return corrected_prompt + + # As a last resort, wrap the prompt ourselves to emulate instruct-style. + return f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT} Sure, here you go:\n" + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + r"""Call out to Anthropic's completion endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + prompt = "What are the biggest risks facing humanity?" + prompt = f"\n\nHuman: {prompt}\n\nAssistant:" + response = model(prompt) + + """ + if self.streaming: + completion = "" + for chunk in self._stream( + prompt=prompt, stop=stop, run_manager=run_manager, **kwargs + ): + completion += chunk.text + return completion + + stop = self._get_anthropic_stop(stop) + params = {**self._default_params, **kwargs} + response = self.client.completions.create( + prompt=self._wrap_prompt(prompt), + stop_sequences=stop, + **params, ) - return response.json().get("completion") - - def __call__(self, task: str, stop=None): - """Call out to Anthropic's completion endpoint.""" - stop = stop or [] - params = self._default_params() - headers = {"Authorization": f"Bearer {self.anthropic_api_key}"} - data = {"prompt": task, "stop_sequences": stop, **params} - response = requests.post( - f"{self.anthropic_api_url}/completions", - headers=headers, - json=data, - timeout=self.default_request_timeout, + return response.completion + + def convert_prompt(self, prompt: PromptValue) -> str: + return self._wrap_prompt(prompt.to_string()) + + async def _acall( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Call out to Anthropic's completion endpoint asynchronously.""" + if self.streaming: + completion = "" + async for chunk in self._astream( + prompt=prompt, stop=stop, run_manager=run_manager, **kwargs + ): + completion += chunk.text + return completion + + stop = self._get_anthropic_stop(stop) + params = {**self._default_params, **kwargs} + + response = await self.async_client.completions.create( + prompt=self._wrap_prompt(prompt), + stop_sequences=stop, + **params, ) - return response.json().get("completion") + return response.completion + + def _stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + r"""Call Anthropic completion_stream and return the resulting generator. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + Returns: + A generator representing the stream of tokens from Anthropic. + Example: + .. code-block:: python + + prompt = "Write a poem about a stream." + prompt = f"\n\nHuman: {prompt}\n\nAssistant:" + generator = anthropic.stream(prompt) + for token in generator: + yield token + """ + stop = self._get_anthropic_stop(stop) + params = {**self._default_params, **kwargs} + + for token in self.client.completions.create( + prompt=self._wrap_prompt(prompt), stop_sequences=stop, stream=True, **params + ): + chunk = GenerationChunk(text=token.completion) + yield chunk + if run_manager: + run_manager.on_llm_new_token(chunk.text, chunk=chunk) + + async def _astream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[GenerationChunk]: + r"""Call Anthropic completion_stream and return the resulting generator. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + Returns: + A generator representing the stream of tokens from Anthropic. + Example: + .. code-block:: python + prompt = "Write a poem about a stream." + prompt = f"\n\nHuman: {prompt}\n\nAssistant:" + generator = anthropic.stream(prompt) + for token in generator: + yield token + """ + stop = self._get_anthropic_stop(stop) + params = {**self._default_params, **kwargs} + + async for token in await self.async_client.completions.create( + prompt=self._wrap_prompt(prompt), + stop_sequences=stop, + stream=True, + **params, + ): + chunk = GenerationChunk(text=token.completion) + yield chunk + if run_manager: + await run_manager.on_llm_new_token(chunk.text, chunk=chunk) + + def get_num_tokens(self, text: str) -> int: + """Calculate number of tokens.""" + if not self.count_tokens: + raise NameError("Please ensure the anthropic package is loaded") + return self.count_tokens(text) diff --git a/swarms/models/bioclip.py b/swarms/models/bioclip.py index 318de290..c2b4bfa5 100644 --- a/swarms/models/bioclip.py +++ b/swarms/models/bioclip.py @@ -3,7 +3,7 @@ BiomedCLIP-PubMedBERT_256-vit_base_patch16_224 https://huggingface.co/microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224 -BiomedCLIP is a biomedical vision-language foundation model that is pretrained on PMC-15M, +BiomedCLIP is a biomedical vision-language foundation model that is pretrained on PMC-15M, a dataset of 15 million figure-caption pairs extracted from biomedical research articles in PubMed Central, using contrastive learning. It uses PubMedBERT as the text encoder and Vision Transformer as the image encoder, with domain-specific adaptations. It can perform various vision-language processing (VLP) tasks such as cross-modal retrieval, image classification, and visual question answering. BiomedCLIP establishes new state of the art in a wide range of standard datasets, and substantially outperforms prior VLP approaches: diff --git a/swarms/models/biogpt.py b/swarms/models/biogpt.py index f5abdf95..83c31e55 100644 --- a/swarms/models/biogpt.py +++ b/swarms/models/biogpt.py @@ -1,18 +1,18 @@ """ BioGPT -Pre-trained language models have attracted increasing attention in the biomedical domain, -inspired by their great success in the general natural language domain. -Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), -the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. -While they have achieved great success on a variety of discriminative downstream biomedical tasks, -the lack of generation ability constrains their application scope. -In this paper, we propose BioGPT, a domain-specific generative Transformer language model -pre-trained on large-scale biomedical literature. -We evaluate BioGPT on six biomedical natural language processing tasks -and demonstrate that our model outperforms previous models on most tasks. -Especially, we get 44.98%, 38.42% and 40.76% F1 score on BC5CDR, KD-DTI and DDI -end-to-end relation extraction tasks, respectively, and 78.2% accuracy on PubMedQA, -creating a new record. Our case study on text generation further demonstrates the +Pre-trained language models have attracted increasing attention in the biomedical domain, +inspired by their great success in the general natural language domain. +Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), +the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. +While they have achieved great success on a variety of discriminative downstream biomedical tasks, +the lack of generation ability constrains their application scope. +In this paper, we propose BioGPT, a domain-specific generative Transformer language model +pre-trained on large-scale biomedical literature. +We evaluate BioGPT on six biomedical natural language processing tasks +and demonstrate that our model outperforms previous models on most tasks. +Especially, we get 44.98%, 38.42% and 40.76% F1 score on BC5CDR, KD-DTI and DDI +end-to-end relation extraction tasks, respectively, and 78.2% accuracy on PubMedQA, +creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms. @@ -105,13 +105,15 @@ class BioGPT: generator = pipeline( "text-generation", model=self.model, tokenizer=self.tokenizer ) - return generator( + out = generator( text, max_length=self.max_length, num_return_sequences=self.num_return_sequences, do_sample=self.do_sample, ) + return out[0]["generated_text"] + def get_features(self, text): """ Get the features of a given text. diff --git a/swarms/models/dalle3.py b/swarms/models/dalle3.py new file mode 100644 index 00000000..c24f262d --- /dev/null +++ b/swarms/models/dalle3.py @@ -0,0 +1,178 @@ +import logging +import os +from dataclasses import dataclass +from io import BytesIO + +import openai +from dotenv import load_dotenv +from openai import OpenAI +from PIL import Image +from pydantic import validator +from termcolor import colored + +load_dotenv() + +# api_key = os.getenv("OPENAI_API_KEY") + +# Configure Logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@dataclass +class Dalle3: + """ + Dalle3 model class + + Attributes: + ----------- + image_url: str + The image url generated by the Dalle3 API + + Methods: + -------- + __call__(self, task: str) -> Dalle3: + Makes a call to the Dalle3 API and returns the image url + + Example: + -------- + >>> dalle3 = Dalle3() + >>> task = "A painting of a dog" + >>> image_url = dalle3(task) + >>> print(image_url) + https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png + + """ + + model: str = "dall-e-3" + img: str = None + size: str = "1024x1024" + max_retries: int = 3 + quality: str = "standard" + api_key: str = None + n: int = 4 + client = OpenAI( + api_key=api_key, + max_retries=max_retries, + ) + + class Config: + """Config class for the Dalle3 model""" + + arbitrary_types_allowed = True + + @validator("max_retries", "time_seconds") + def must_be_positive(cls, value): + if value <= 0: + raise ValueError("Must be positive") + return value + + def read_img(self, img: str): + """Read the image using pil""" + img = Image.open(img) + return img + + def set_width_height(self, img: str, width: int, height: int): + """Set the width and height of the image""" + img = self.read_img(img) + img = img.resize((width, height)) + return img + + def convert_to_bytesio(self, img: str, format: str = "PNG"): + """Convert the image to an bytes io object""" + byte_stream = BytesIO() + img.save(byte_stream, format=format) + byte_array = byte_stream.getvalue() + return byte_array + + # @lru_cache(maxsize=32) + def __call__(self, task: str): + """ + Text to image conversion using the Dalle3 API + + Parameters: + ----------- + task: str + The task to be converted to an image + + Returns: + -------- + Dalle3: + An instance of the Dalle3 class with the image url generated by the Dalle3 API + + Example: + -------- + >>> dalle3 = Dalle3() + >>> task = "A painting of a dog" + >>> image_url = dalle3(task) + >>> print(image_url) + https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png + """ + try: + # Making a call to the the Dalle3 API + response = self.client.images.generate( + model=self.model, + prompt=task, + size=self.size, + quality=self.quality, + n=self.n, + ) + # Extracting the image url from the response + img = response.data[0].url + return img + except openai.OpenAIError as error: + # Handling exceptions and printing the errors details + print( + colored( + ( + f"Error running Dalle3: {error} try optimizing your api key and" + " or try again" + ), + "red", + ) + ) + raise error + + def create_variations(self, img: str): + """ + Create variations of an image using the Dalle3 API + + Parameters: + ----------- + img: str + The image to be used for the API request + + Returns: + -------- + img: str + The image url generated by the Dalle3 API + + Example: + -------- + >>> dalle3 = Dalle3() + >>> img = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + >>> img = dalle3.create_variations(img) + >>> print(img) + + + """ + try: + response = self.client.images.create_variation( + img=open(img, "rb"), n=self.n, size=self.size + ) + img = response.data[0].url + + return img + except (Exception, openai.OpenAIError) as error: + print( + colored( + ( + f"Error running Dalle3: {error} try optimizing your api key and" + " or try again" + ), + "red", + ) + ) + print(colored(f"Error running Dalle3: {error.http_status}", "red")) + print(colored(f"Error running Dalle3: {error.error}", "red")) + raise error diff --git a/swarms/models/distilled_whisperx.py b/swarms/models/distilled_whisperx.py index 2eb2788d..0a60aaac 100644 --- a/swarms/models/distilled_whisperx.py +++ b/swarms/models/distilled_whisperx.py @@ -1,3 +1,160 @@ -""" +import asyncio +import os +import time +from functools import wraps +from typing import Union -""" \ No newline at end of file +import torch +from termcolor import colored +from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline + + +def async_retry(max_retries=3, exceptions=(Exception,), delay=1): + """ + A decorator for adding retry logic to async functions. + :param max_retries: Maximum number of retries before giving up. + :param exceptions: A tuple of exceptions to catch and retry on. + :param delay: Delay between retries. + """ + + def decorator(func): + @wraps(func) + async def wrapper(*args, **kwargs): + retries = max_retries + while retries: + try: + return await func(*args, **kwargs) + except exceptions as e: + retries -= 1 + if retries <= 0: + raise + print(f"Retry after exception: {e}, Attempts remaining: {retries}") + await asyncio.sleep(delay) + + return wrapper + + return decorator + + +class DistilWhisperModel: + """ + This class encapsulates the Distil-Whisper model for English speech recognition. + It allows for both synchronous and asynchronous transcription of short and long-form audio. + + Args: + model_id: The model ID to use. Defaults to "distil-whisper/distil-large-v2". + + + Attributes: + device: The device to use for inference. + torch_dtype: The torch data type to use for inference. + model_id: The model ID to use. + model: The model instance. + processor: The processor instance. + + Usage: + model_wrapper = DistilWhisperModel() + transcription = model_wrapper('path/to/audio.mp3') + + # For async usage + transcription = asyncio.run(model_wrapper.async_transcribe('path/to/audio.mp3')) + """ + + def __init__(self, model_id="distil-whisper/distil-large-v2"): + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + self.torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 + self.model_id = model_id + self.model = AutoModelForSpeechSeq2Seq.from_pretrained( + model_id, + torch_dtype=self.torch_dtype, + low_cpu_mem_usage=True, + use_safetensors=True, + ).to(self.device) + self.processor = AutoProcessor.from_pretrained(model_id) + + def __call__(self, inputs: Union[str, dict]): + return self.transcribe(inputs) + + def transcribe(self, inputs: Union[str, dict]): + """ + Synchronously transcribe the given audio input using the Distil-Whisper model. + :param inputs: A string representing the file path or a dict with audio data. + :return: The transcribed text. + """ + pipe = pipeline( + "automatic-speech-recognition", + model=self.model, + tokenizer=self.processor.tokenizer, + feature_extractor=self.processor.feature_extractor, + max_new_tokens=128, + torch_dtype=self.torch_dtype, + device=self.device, + ) + + return pipe(inputs)["text"] + + @async_retry() + async def async_transcribe(self, inputs: Union[str, dict]): + """ + Asynchronously transcribe the given audio input using the Distil-Whisper model. + :param inputs: A string representing the file path or a dict with audio data. + :return: The transcribed text. + """ + loop = asyncio.get_event_loop() + return await loop.run_in_executor(None, self.transcribe, inputs) + + def real_time_transcribe(self, audio_file_path, chunk_duration=5): + """ + Simulates real-time transcription of an audio file, processing and printing results + in chunks with colored output for readability. + + :param audio_file_path: Path to the audio file to be transcribed. + :param chunk_duration: Duration in seconds of each audio chunk to be processed. + """ + if not os.path.isfile(audio_file_path): + print(colored("The audio file was not found.", "red")) + return + + # Assuming `chunk_duration` is in seconds and `processor` can handle chunk-wise processing + try: + with torch.no_grad(): + # Load the whole audio file, but process and transcribe it in chunks + audio_input = self.processor.audio_file_to_array(audio_file_path) + sample_rate = audio_input.sampling_rate + total_duration = len(audio_input.array) / sample_rate + chunks = [ + audio_input.array[i : i + sample_rate * chunk_duration] + for i in range( + 0, len(audio_input.array), sample_rate * chunk_duration + ) + ] + + print(colored("Starting real-time transcription...", "green")) + + for i, chunk in enumerate(chunks): + # Process the current chunk + processed_inputs = self.processor( + chunk, + sampling_rate=sample_rate, + return_tensors="pt", + padding=True, + ) + processed_inputs = processed_inputs.input_values.to(self.device) + + # Generate transcription for the chunk + logits = self.model.generate(processed_inputs) + transcription = self.processor.batch_decode( + logits, skip_special_tokens=True + )[0] + + # Print the chunk's transcription + print( + colored(f"Chunk {i+1}/{len(chunks)}: ", "yellow") + + transcription + ) + + # Wait for the chunk's duration to simulate real-time processing + time.sleep(chunk_duration) + + except Exception as e: + print(colored(f"An error occurred during transcription: {e}", "red")) diff --git a/swarms/models/fast_vit_classes.json b/swarms/models/fast_vit_classes.json new file mode 100644 index 00000000..57434253 --- /dev/null +++ b/swarms/models/fast_vit_classes.json @@ -0,0 +1,1000 @@ +["tench", +"goldfish", +"great white shark", +"tiger shark", +"hammerhead shark", +"electric ray", +"stingray", +"cock", +"hen", +"ostrich", +"brambling", +"goldfinch", +"house finch", +"junco", +"indigo bunting", +"American robin", +"bulbul", +"jay", +"magpie", +"chickadee", +"American dipper", +"kite", +"bald eagle", +"vulture", +"great grey owl", +"fire salamander", +"smooth newt", +"newt", +"spotted salamander", +"axolotl", +"American bullfrog", +"tree frog", +"tailed frog", +"loggerhead sea turtle", +"leatherback sea turtle", +"mud turtle", +"terrapin", +"box turtle", +"banded gecko", +"green iguana", +"Carolina anole", +"desert grassland whiptail lizard", +"agama", +"frilled-necked lizard", +"alligator lizard", +"Gila monster", +"European green lizard", +"chameleon", +"Komodo dragon", +"Nile crocodile", +"American alligator", +"triceratops", +"worm snake", +"ring-necked snake", +"eastern hog-nosed snake", +"smooth green snake", +"kingsnake", +"garter snake", +"water snake", +"vine snake", +"night snake", +"boa constrictor", +"African rock python", +"Indian cobra", +"green mamba", +"sea snake", +"Saharan horned viper", +"eastern diamondback rattlesnake", +"sidewinder", +"trilobite", +"harvestman", +"scorpion", +"yellow garden spider", +"barn spider", +"European garden spider", +"southern black widow", +"tarantula", +"wolf spider", +"tick", +"centipede", +"black grouse", +"ptarmigan", +"ruffed grouse", +"prairie grouse", +"peacock", +"quail", +"partridge", +"grey parrot", +"macaw", +"sulphur-crested cockatoo", +"lorikeet", +"coucal", +"bee eater", +"hornbill", +"hummingbird", +"jacamar", +"toucan", +"duck", +"red-breasted merganser", +"goose", +"black swan", +"tusker", +"echidna", +"platypus", +"wallaby", +"koala", +"wombat", +"jellyfish", +"sea anemone", +"brain coral", +"flatworm", +"nematode", +"conch", +"snail", +"slug", +"sea slug", +"chiton", +"chambered nautilus", +"Dungeness crab", +"rock crab", +"fiddler crab", +"red king crab", +"American lobster", +"spiny lobster", +"crayfish", +"hermit crab", +"isopod", +"white stork", +"black stork", +"spoonbill", +"flamingo", +"little blue heron", +"great egret", +"bittern", +"crane (bird)", +"limpkin", +"common gallinule", +"American coot", +"bustard", +"ruddy turnstone", +"dunlin", +"common redshank", +"dowitcher", +"oystercatcher", +"pelican", +"king penguin", +"albatross", +"grey whale", +"killer whale", +"dugong", +"sea lion", +"Chihuahua", +"Japanese Chin", +"Maltese", +"Pekingese", +"Shih Tzu", +"King Charles Spaniel", +"Papillon", +"toy terrier", +"Rhodesian Ridgeback", +"Afghan Hound", +"Basset Hound", +"Beagle", +"Bloodhound", +"Bluetick Coonhound", +"Black and Tan Coonhound", +"Treeing Walker Coonhound", +"English foxhound", +"Redbone Coonhound", +"borzoi", +"Irish Wolfhound", +"Italian Greyhound", +"Whippet", +"Ibizan Hound", +"Norwegian Elkhound", +"Otterhound", +"Saluki", +"Scottish Deerhound", +"Weimaraner", +"Staffordshire Bull Terrier", +"American Staffordshire Terrier", +"Bedlington Terrier", +"Border Terrier", +"Kerry Blue Terrier", +"Irish Terrier", +"Norfolk Terrier", +"Norwich Terrier", +"Yorkshire Terrier", +"Wire Fox Terrier", +"Lakeland Terrier", +"Sealyham Terrier", +"Airedale Terrier", +"Cairn Terrier", +"Australian Terrier", +"Dandie Dinmont Terrier", +"Boston Terrier", +"Miniature Schnauzer", +"Giant Schnauzer", +"Standard Schnauzer", +"Scottish Terrier", +"Tibetan Terrier", +"Australian Silky Terrier", +"Soft-coated Wheaten Terrier", +"West Highland White Terrier", +"Lhasa Apso", +"Flat-Coated Retriever", +"Curly-coated Retriever", +"Golden Retriever", +"Labrador Retriever", +"Chesapeake Bay Retriever", +"German Shorthaired Pointer", +"Vizsla", +"English Setter", +"Irish Setter", +"Gordon Setter", +"Brittany Spaniel", +"Clumber Spaniel", +"English Springer Spaniel", +"Welsh Springer Spaniel", +"Cocker Spaniels", +"Sussex Spaniel", +"Irish Water Spaniel", +"Kuvasz", +"Schipperke", +"Groenendael", +"Malinois", +"Briard", +"Australian Kelpie", +"Komondor", +"Old English Sheepdog", +"Shetland Sheepdog", +"collie", +"Border Collie", +"Bouvier des Flandres", +"Rottweiler", +"German Shepherd Dog", +"Dobermann", +"Miniature Pinscher", +"Greater Swiss Mountain Dog", +"Bernese Mountain Dog", +"Appenzeller Sennenhund", +"Entlebucher Sennenhund", +"Boxer", +"Bullmastiff", +"Tibetan Mastiff", +"French Bulldog", +"Great Dane", +"St. Bernard", +"husky", +"Alaskan Malamute", +"Siberian Husky", +"Dalmatian", +"Affenpinscher", +"Basenji", +"pug", +"Leonberger", +"Newfoundland", +"Pyrenean Mountain Dog", +"Samoyed", +"Pomeranian", +"Chow Chow", +"Keeshond", +"Griffon Bruxellois", +"Pembroke Welsh Corgi", +"Cardigan Welsh Corgi", +"Toy Poodle", +"Miniature Poodle", +"Standard Poodle", +"Mexican hairless dog", +"grey wolf", +"Alaskan tundra wolf", +"red wolf", +"coyote", +"dingo", +"dhole", +"African wild dog", +"hyena", +"red fox", +"kit fox", +"Arctic fox", +"grey fox", +"tabby cat", +"tiger cat", +"Persian cat", +"Siamese cat", +"Egyptian Mau", +"cougar", +"lynx", +"leopard", +"snow leopard", +"jaguar", +"lion", +"tiger", +"cheetah", +"brown bear", +"American black bear", +"polar bear", +"sloth bear", +"mongoose", +"meerkat", +"tiger beetle", +"ladybug", +"ground beetle", +"longhorn beetle", +"leaf beetle", +"dung beetle", +"rhinoceros beetle", +"weevil", +"fly", +"bee", +"ant", +"grasshopper", +"cricket", +"stick insect", +"cockroach", +"mantis", +"cicada", +"leafhopper", +"lacewing", +"dragonfly", +"damselfly", +"red admiral", +"ringlet", +"monarch butterfly", +"small white", +"sulphur butterfly", +"gossamer-winged butterfly", +"starfish", +"sea urchin", +"sea cucumber", +"cottontail rabbit", +"hare", +"Angora rabbit", +"hamster", +"porcupine", +"fox squirrel", +"marmot", +"beaver", +"guinea pig", +"common sorrel", +"zebra", +"pig", +"wild boar", +"warthog", +"hippopotamus", +"ox", +"water buffalo", +"bison", +"ram", +"bighorn sheep", +"Alpine ibex", +"hartebeest", +"impala", +"gazelle", +"dromedary", +"llama", +"weasel", +"mink", +"European polecat", +"black-footed ferret", +"otter", +"skunk", +"badger", +"armadillo", +"three-toed sloth", +"orangutan", +"gorilla", +"chimpanzee", +"gibbon", +"siamang", +"guenon", +"patas monkey", +"baboon", +"macaque", +"langur", +"black-and-white colobus", +"proboscis monkey", +"marmoset", +"white-headed capuchin", +"howler monkey", +"titi", +"Geoffroy's spider monkey", +"common squirrel monkey", +"ring-tailed lemur", +"indri", +"Asian elephant", +"African bush elephant", +"red panda", +"giant panda", +"snoek", +"eel", +"coho salmon", +"rock beauty", +"clownfish", +"sturgeon", +"garfish", +"lionfish", +"pufferfish", +"abacus", +"abaya", +"academic gown", +"accordion", +"acoustic guitar", +"aircraft carrier", +"airliner", +"airship", +"altar", +"ambulance", +"amphibious vehicle", +"analog clock", +"apiary", +"apron", +"waste container", +"assault rifle", +"backpack", +"bakery", +"balance beam", +"balloon", +"ballpoint pen", +"Band-Aid", +"banjo", +"baluster", +"barbell", +"barber chair", +"barbershop", +"barn", +"barometer", +"barrel", +"wheelbarrow", +"baseball", +"basketball", +"bassinet", +"bassoon", +"swimming cap", +"bath towel", +"bathtub", +"station wagon", +"lighthouse", +"beaker", +"military cap", +"beer bottle", +"beer glass", +"bell-cot", +"bib", +"tandem bicycle", +"bikini", +"ring binder", +"binoculars", +"birdhouse", +"boathouse", +"bobsleigh", +"bolo tie", +"poke bonnet", +"bookcase", +"bookstore", +"bottle cap", +"bow", +"bow tie", +"brass", +"bra", +"breakwater", +"breastplate", +"broom", +"bucket", +"buckle", +"bulletproof vest", +"high-speed train", +"butcher shop", +"taxicab", +"cauldron", +"candle", +"cannon", +"canoe", +"can opener", +"cardigan", +"car mirror", +"carousel", +"tool kit", +"carton", +"car wheel", +"automated teller machine", +"cassette", +"cassette player", +"castle", +"catamaran", +"CD player", +"cello", +"mobile phone", +"chain", +"chain-link fence", +"chain mail", +"chainsaw", +"chest", +"chiffonier", +"chime", +"china cabinet", +"Christmas stocking", +"church", +"movie theater", +"cleaver", +"cliff dwelling", +"cloak", +"clogs", +"cocktail shaker", +"coffee mug", +"coffeemaker", +"coil", +"combination lock", +"computer keyboard", +"confectionery store", +"container ship", +"convertible", +"corkscrew", +"cornet", +"cowboy boot", +"cowboy hat", +"cradle", +"crane (machine)", +"crash helmet", +"crate", +"infant bed", +"Crock Pot", +"croquet ball", +"crutch", +"cuirass", +"dam", +"desk", +"desktop computer", +"rotary dial telephone", +"diaper", +"digital clock", +"digital watch", +"dining table", +"dishcloth", +"dishwasher", +"disc brake", +"dock", +"dog sled", +"dome", +"doormat", +"drilling rig", +"drum", +"drumstick", +"dumbbell", +"Dutch oven", +"electric fan", +"electric guitar", +"electric locomotive", +"entertainment center", +"envelope", +"espresso machine", +"face powder", +"feather boa", +"filing cabinet", +"fireboat", +"fire engine", +"fire screen sheet", +"flagpole", +"flute", +"folding chair", +"football helmet", +"forklift", +"fountain", +"fountain pen", +"four-poster bed", +"freight car", +"French horn", +"frying pan", +"fur coat", +"garbage truck", +"gas mask", +"gas pump", +"goblet", +"go-kart", +"golf ball", +"golf cart", +"gondola", +"gong", +"gown", +"grand piano", +"greenhouse", +"grille", +"grocery store", +"guillotine", +"barrette", +"hair spray", +"half-track", +"hammer", +"hamper", +"hair dryer", +"hand-held computer", +"handkerchief", +"hard disk drive", +"harmonica", +"harp", +"harvester", +"hatchet", +"holster", +"home theater", +"honeycomb", +"hook", +"hoop skirt", +"horizontal bar", +"horse-drawn vehicle", +"hourglass", +"iPod", +"clothes iron", +"jack-o'-lantern", +"jeans", +"jeep", +"T-shirt", +"jigsaw puzzle", +"pulled rickshaw", +"joystick", +"kimono", +"knee pad", +"knot", +"lab coat", +"ladle", +"lampshade", +"laptop computer", +"lawn mower", +"lens cap", +"paper knife", +"library", +"lifeboat", +"lighter", +"limousine", +"ocean liner", +"lipstick", +"slip-on shoe", +"lotion", +"speaker", +"loupe", +"sawmill", +"magnetic compass", +"mail bag", +"mailbox", +"tights", +"tank suit", +"manhole cover", +"maraca", +"marimba", +"mask", +"match", +"maypole", +"maze", +"measuring cup", +"medicine chest", +"megalith", +"microphone", +"microwave oven", +"military uniform", +"milk can", +"minibus", +"miniskirt", +"minivan", +"missile", +"mitten", +"mixing bowl", +"mobile home", +"Model T", +"modem", +"monastery", +"monitor", +"moped", +"mortar", +"square academic cap", +"mosque", +"mosquito net", +"scooter", +"mountain bike", +"tent", +"computer mouse", +"mousetrap", +"moving van", +"muzzle", +"nail", +"neck brace", +"necklace", +"nipple", +"notebook computer", +"obelisk", +"oboe", +"ocarina", +"odometer", +"oil filter", +"organ", +"oscilloscope", +"overskirt", +"bullock cart", +"oxygen mask", +"packet", +"paddle", +"paddle wheel", +"padlock", +"paintbrush", +"pajamas", +"palace", +"pan flute", +"paper towel", +"parachute", +"parallel bars", +"park bench", +"parking meter", +"passenger car", +"patio", +"payphone", +"pedestal", +"pencil case", +"pencil sharpener", +"perfume", +"Petri dish", +"photocopier", +"plectrum", +"Pickelhaube", +"picket fence", +"pickup truck", +"pier", +"piggy bank", +"pill bottle", +"pillow", +"ping-pong ball", +"pinwheel", +"pirate ship", +"pitcher", +"hand plane", +"planetarium", +"plastic bag", +"plate rack", +"plow", +"plunger", +"Polaroid camera", +"pole", +"police van", +"poncho", +"billiard table", +"soda bottle", +"pot", +"potter's wheel", +"power drill", +"prayer rug", +"printer", +"prison", +"projectile", +"projector", +"hockey puck", +"punching bag", +"purse", +"quill", +"quilt", +"race car", +"racket", +"radiator", +"radio", +"radio telescope", +"rain barrel", +"recreational vehicle", +"reel", +"reflex camera", +"refrigerator", +"remote control", +"restaurant", +"revolver", +"rifle", +"rocking chair", +"rotisserie", +"eraser", +"rugby ball", +"ruler", +"running shoe", +"safe", +"safety pin", +"salt shaker", +"sandal", +"sarong", +"saxophone", +"scabbard", +"weighing scale", +"school bus", +"schooner", +"scoreboard", +"CRT screen", +"screw", +"screwdriver", +"seat belt", +"sewing machine", +"shield", +"shoe store", +"shoji", +"shopping basket", +"shopping cart", +"shovel", +"shower cap", +"shower curtain", +"ski", +"ski mask", +"sleeping bag", +"slide rule", +"sliding door", +"slot machine", +"snorkel", +"snowmobile", +"snowplow", +"soap dispenser", +"soccer ball", +"sock", +"solar thermal collector", +"sombrero", +"soup bowl", +"space bar", +"space heater", +"space shuttle", +"spatula", +"motorboat", +"spider web", +"spindle", +"sports car", +"spotlight", +"stage", +"steam locomotive", +"through arch bridge", +"steel drum", +"stethoscope", +"scarf", +"stone wall", +"stopwatch", +"stove", +"strainer", +"tram", +"stretcher", +"couch", +"stupa", +"submarine", +"suit", +"sundial", +"sunglass", +"sunglasses", +"sunscreen", +"suspension bridge", +"mop", +"sweatshirt", +"swimsuit", +"swing", +"switch", +"syringe", +"table lamp", +"tank", +"tape player", +"teapot", +"teddy bear", +"television", +"tennis ball", +"thatched roof", +"front curtain", +"thimble", +"threshing machine", +"throne", +"tile roof", +"toaster", +"tobacco shop", +"toilet seat", +"torch", +"totem pole", +"tow truck", +"toy store", +"tractor", +"semi-trailer truck", +"tray", +"trench coat", +"tricycle", +"trimaran", +"tripod", +"triumphal arch", +"trolleybus", +"trombone", +"tub", +"turnstile", +"typewriter keyboard", +"umbrella", +"unicycle", +"upright piano", +"vacuum cleaner", +"vase", +"vault", +"velvet", +"vending machine", +"vestment", +"viaduct", +"violin", +"volleyball", +"waffle iron", +"wall clock", +"wallet", +"wardrobe", +"military aircraft", +"sink", +"washing machine", +"water bottle", +"water jug", +"water tower", +"whiskey jug", +"whistle", +"wig", +"window screen", +"window shade", +"Windsor tie", +"wine bottle", +"wing", +"wok", +"wooden spoon", +"wool", +"split-rail fence", +"shipwreck", +"yawl", +"yurt", +"website", +"comic book", +"crossword", +"traffic sign", +"traffic light", +"dust jacket", +"menu", +"plate", +"guacamole", +"consomme", +"hot pot", +"trifle", +"ice cream", +"ice pop", +"baguette", +"bagel", +"pretzel", +"cheeseburger", +"hot dog", +"mashed potato", +"cabbage", +"broccoli", +"cauliflower", +"zucchini", +"spaghetti squash", +"acorn squash", +"butternut squash", +"cucumber", +"artichoke", +"bell pepper", +"cardoon", +"mushroom", +"Granny Smith", +"strawberry", +"orange", +"lemon", +"fig", +"pineapple", +"banana", +"jackfruit", +"custard apple", +"pomegranate", +"hay", +"carbonara", +"chocolate syrup", +"dough", +"meatloaf", +"pizza", +"pot pie", +"burrito", +"red wine", +"espresso", +"cup", +"eggnog", +"alp", +"bubble", +"cliff", +"coral reef", +"geyser", +"lakeshore", +"promontory", +"shoal", +"seashore", +"valley", +"volcano", +"baseball player", +"bridegroom", +"scuba diver", +"rapeseed", +"daisy", +"yellow lady's slipper", +"corn", +"acorn", +"rose hip", +"horse chestnut seed", +"coral fungus", +"agaric", +"gyromitra", +"stinkhorn mushroom", +"earth star", +"hen-of-the-woods", +"bolete", +"ear of corn", +"toilet paper"] \ No newline at end of file diff --git a/swarms/models/fastvit.py b/swarms/models/fastvit.py new file mode 100644 index 00000000..a2d6bc0a --- /dev/null +++ b/swarms/models/fastvit.py @@ -0,0 +1,81 @@ +import json +import os +from typing import List + +import numpy as np +import timm +import torch +from PIL import Image +from pydantic import BaseModel, StrictFloat, StrictInt, validator + +DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Load the classes for image classification +with open(os.path.join(os.path.dirname(__file__), "fast_vit_classes.json")) as f: + FASTVIT_IMAGENET_1K_CLASSES = json.load(f) + + +class ClassificationResult(BaseModel): + class_id: List[StrictInt] + confidence: List[StrictFloat] + + @validator("class_id", "confidence", pre=True, each_item=True) + def check_list_contents(cls, v): + assert isinstance(v, int) or isinstance(v, float), "must be integer or float" + return v + + +class FastViT: + """ + FastViT model for image classification + + Args: + img (str): path to the input image + confidence_threshold (float): confidence threshold for the model's predictions + + Returns: + ClassificationResult: a pydantic BaseModel containing the class ids and confidences of the model's predictions + + + Example: + >>> fastvit = FastViT() + >>> result = fastvit(img="path_to_image.jpg", confidence_threshold=0.5) + + + To use, create a json file called: fast_vit_classes.json + + """ + + def __init__(self): + self.model = timm.create_model( + "hf_hub:timm/fastvit_s12.apple_in1k", pretrained=True + ).to(DEVICE) + data_config = timm.data.resolve_model_data_config(self.model) + self.transforms = timm.data.create_transform(**data_config, is_training=False) + self.model.eval() + + def __call__( + self, img: str, confidence_threshold: float = 0.5 + ) -> ClassificationResult: + """classifies the input image and returns the top k classes and their probabilities""" + img = Image.open(img).convert("RGB") + img_tensor = self.transforms(img).unsqueeze(0).to(DEVICE) + with torch.no_grad(): + output = self.model(img_tensor) + probabilities = torch.nn.functional.softmax(output, dim=1) + + # Get top k classes and their probabilities + top_probs, top_classes = torch.topk( + probabilities, k=FASTVIT_IMAGENET_1K_CLASSES + ) + + # Filter by confidence threshold + mask = top_probs > confidence_threshold + top_probs, top_classes = top_probs[mask], top_classes[mask] + + # Convert to Python lists and map class indices to labels if needed + top_probs = top_probs.cpu().numpy().tolist() + top_classes = top_classes.cpu().numpy().tolist() + # top_class_labels = [FASTVIT_IMAGENET_1K_CLASSES[i] for i in top_classes] # Uncomment if class labels are needed + + return ClassificationResult(class_id=top_classes, confidence=top_probs) diff --git a/swarms/models/fuyu.py b/swarms/models/fuyu.py index e8d16cdf..dd664f51 100644 --- a/swarms/models/fuyu.py +++ b/swarms/models/fuyu.py @@ -1,11 +1,13 @@ -"""Fuyu model by Kye""" +from io import BytesIO + +import requests +from PIL import Image from transformers import ( - FuyuForCausalLM, AutoTokenizer, - FuyuProcessor, + FuyuForCausalLM, FuyuImageProcessor, + FuyuProcessor, ) -from PIL import Image class Fuyu: @@ -27,15 +29,15 @@ class Fuyu: >>> fuyu = Fuyu() >>> fuyu("Hello, my name is", "path/to/image.png") - - """ def __init__( self, pretrained_path: str = "adept/fuyu-8b", - device_map: str = "cuda:0", - max_new_tokens: int = 7, + device_map: str = "auto", + max_new_tokens: int = 500, + *args, + **kwargs, ): self.pretrained_path = pretrained_path self.device_map = device_map @@ -44,15 +46,22 @@ class Fuyu: self.tokenizer = AutoTokenizer.from_pretrained(pretrained_path) self.image_processor = FuyuImageProcessor() self.processor = FuyuProcessor( - image_procesor=self.image_processor, tokenizer=self.tokenizer + image_processor=self.image_processor, tokenizer=self.tokenizer, **kwargs ) self.model = FuyuForCausalLM.from_pretrained( - pretrained_path, device_map=device_map + pretrained_path, + device_map=device_map, + **kwargs, ) - def __call__(self, text: str, img_path: str): + def get_img(self, img: str): + """Get the image from the path""" + image_pil = Image.open(img) + return image_pil + + def __call__(self, text: str, img: str): """Call the model with text and img paths""" - image_pil = Image.open(img_path) + image_pil = Image.open(img) model_inputs = self.processor( text=text, images=[image_pil], device=self.device_map ) @@ -60,7 +69,12 @@ class Fuyu: for k, v in model_inputs.items(): model_inputs[k] = v.to(self.device_map) - output = self.model.generate( - **model_inputs, max_new_tokens=self.fmax_new_tokens - ) + output = self.model.generate(**model_inputs, max_new_tokens=self.max_new_tokens) text = self.processor.batch_decode(output[:, -7:], skip_special_tokens=True) + return print(str(text)) + + def get_img_from_web(self, img_url: str): + """Get the image from the web""" + response = requests.get(img_url) + image_pil = Image.open(BytesIO(response.content)) + return image_pil diff --git a/swarms/models/gpt4v.py b/swarms/models/gpt4v.py new file mode 100644 index 00000000..d1d5ce1f --- /dev/null +++ b/swarms/models/gpt4v.py @@ -0,0 +1,257 @@ +import asyncio +import base64 +import concurrent.futures +import re +from dataclasses import dataclass +from typing import List, Optional, Tuple + +import openai +import requests +from cachetools import TTLCache +from dotenv import load_dotenv +from openai import OpenAI +from ratelimit import limits, sleep_and_retry +from termcolor import colored + +# ENV +load_dotenv() + + +@dataclass +class GPT4VisionResponse: + """A response structure for GPT-4""" + + answer: str + + +@dataclass +class GPT4Vision: + """ + GPT4Vision model class + + Attributes: + ----------- + max_retries: int + The maximum number of retries to make to the API + backoff_factor: float + The backoff factor to use for exponential backoff + timeout_seconds: int + The timeout in seconds for the API request + api_key: str + The API key to use for the API request + quality: str + The quality of the image to generate + max_tokens: int + The maximum number of tokens to use for the API request + + Methods: + -------- + process_img(self, img_path: str) -> str: + Processes the image to be used for the API request + run(self, img: Union[str, List[str]], tasks: List[str]) -> GPT4VisionResponse: + Makes a call to the GPT-4 Vision API and returns the image url + + Example: + >>> gpt4vision = GPT4Vision() + >>> img = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + >>> tasks = ["A painting of a dog"] + >>> answer = gpt4vision(img, tasks) + >>> print(answer) + + """ + + max_retries: int = 3 + model: str = "gpt-4-vision-preview" + backoff_factor: float = 2.0 + timeout_seconds: int = 10 + openai_api_key: Optional[str] = None + # 'Low' or 'High' for respesctively fast or high quality, but high more token usage + quality: str = "low" + # Max tokens to use for the API request, the maximum might be 3,000 but we don't know + max_tokens: int = 200 + client = OpenAI( + api_key=openai_api_key, + ) + dashboard: bool = True + call_limit: int = 1 + period_seconds: int = 60 + + # Cache for storing API Responses + cache = TTLCache(maxsize=100, ttl=600) # Cache for 10 minutes + + class Config: + """Config class for the GPT4Vision model""" + + arbitary_types_allowed = True + + def process_img(self, img: str) -> str: + """Processes the image to be used for the API request""" + with open(img, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") + + @sleep_and_retry + @limits( + calls=call_limit, period=period_seconds + ) # Rate limit of 10 calls per minute + def run(self, task: str, img: str): + """ + Run the GPT-4 Vision model + + Task: str + The task to run + Img: str + The image to run the task on + + """ + if self.dashboard: + self.print_dashboard() + try: + response = self.client.chat.completions.create( + model="gpt-4-vision-preview", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": task}, + { + "type": "image_url", + "image_url": { + "url": str(img), + }, + }, + ], + } + ], + max_tokens=self.max_tokens, + ) + + out = print(response.choices[0]) + # out = self.clean_output(out) + return out + except openai.OpenAIError as e: + # logger.error(f"OpenAI API error: {e}") + return f"OpenAI API error: Could not process the image. {e}" + except Exception as e: + return f"Unexpected error occurred while processing the image. {e}" + + def clean_output(self, output: str): + # Regex pattern to find the Choice object representation in the output + pattern = r"Choice\(.*?\(content=\"(.*?)\".*?\)\)" + match = re.search(pattern, output, re.DOTALL) + + if match: + # Extract the content from the matched pattern + content = match.group(1) + # Replace escaped quotes to get the clean content + content = content.replace(r"\"", '"') + print(content) + else: + print("No content found in the output.") + + async def arun(self, task: str, img: str): + """ + Arun is an async version of run + + Task: str + The task to run + Img: str + The image to run the task on + + """ + try: + response = await self.client.chat.completions.create( + model="gpt-4-vision-preview", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": task}, + { + "type": "image_url", + "image_url": { + "url": img, + }, + }, + ], + } + ], + max_tokens=self.max_tokens, + ) + + return print(response.choices[0]) + except openai.OpenAIError as e: + # logger.error(f"OpenAI API error: {e}") + return f"OpenAI API error: Could not process the image. {e}" + except Exception as e: + return f"Unexpected error occurred while processing the image. {e}" + + def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]: + """Process a batch of tasks and images""" + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [ + executor.submit(self.run, task, img) for task, img in tasks_images + ] + results = [future.result() for future in futures] + return results + + async def run_batch_async(self, tasks_images: List[Tuple[str, str]]) -> List[str]: + """Process a batch of tasks and images asynchronously""" + loop = asyncio.get_event_loop() + futures = [ + loop.run_in_executor(None, self.run, task, img) + for task, img in tasks_images + ] + return await asyncio.gather(*futures) + + async def run_batch_async_with_retries( + self, tasks_images: List[Tuple[str, str]] + ) -> List[str]: + """Process a batch of tasks and images asynchronously with retries""" + loop = asyncio.get_event_loop() + futures = [ + loop.run_in_executor(None, self.run_with_retries, task, img) + for task, img in tasks_images + ] + return await asyncio.gather(*futures) + + def print_dashboard(self): + dashboard = print( + colored( + f""" + GPT4Vision Dashboard + ------------------- + Max Retries: {self.max_retries} + Model: {self.model} + Backoff Factor: {self.backoff_factor} + Timeout Seconds: {self.timeout_seconds} + Image Quality: {self.quality} + Max Tokens: {self.max_tokens} + + """, + "green", + ) + ) + return dashboard + + def health_check(self): + """Health check for the GPT4Vision model""" + try: + response = requests.get("https://api.openai.com/v1/engines") + return response.status_code == 200 + except requests.RequestException as error: + print(f"Health check failed: {error}") + return False + + def sanitize_input(self, text: str) -> str: + """ + Sanitize input to prevent injection attacks. + + Parameters: + text: str - The input text to be sanitized. + + Returns: + The sanitized text. + """ + # Example of simple sanitization, this should be expanded based on the context and usage + sanitized_text = re.sub(r"[^\w\s]", "", text) + return sanitized_text diff --git a/swarms/models/huggingface.py b/swarms/models/huggingface.py index 437d9144..94cbf27e 100644 --- a/swarms/models/huggingface.py +++ b/swarms/models/huggingface.py @@ -4,6 +4,7 @@ import torch from torch.nn.parallel import DistributedDataParallel as DDP from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from termcolor import colored +from termcolor import colored class HuggingfaceLLM: @@ -23,7 +24,7 @@ class HuggingfaceLLM: ``` from swarms.models import HuggingfaceLLM - model_id = "gpt2-small" + model_id = "NousResearch/Yarn-Mistral-7b-128k" inference = HuggingfaceLLM(model_id=model_id) task = "Once upon a time" @@ -45,6 +46,8 @@ class HuggingfaceLLM: decoding=False, *args, **kwargs, + *args, + **kwargs, ): self.logger = logging.getLogger(__name__) self.device = ( @@ -74,15 +77,22 @@ class HuggingfaceLLM: bnb_config = BitsAndBytesConfig(**quantization_config) try: - self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) + self.tokenizer = AutoTokenizer.from_pretrained( + self.model_id, *args, **kwargs + ) self.model = AutoModelForCausalLM.from_pretrained( - self.model_id, quantization_config=bnb_config + self.model_id, quantization_config=bnb_config, *args, **kwargs ) self.model # .to(self.device) except Exception as e: - self.logger.error(f"Failed to load the model or the tokenizer: {e}") - raise + # self.logger.error(f"Failed to load the model or the tokenizer: {e}") + # raise + print(colored(f"Failed to load the model and or the tokenizer: {e}", "red")) + + def print_error(self, error: str): + """Print error""" + print(colored(f"Error: {error}", "red")) def load_model(self): """Load the model""" @@ -106,12 +116,14 @@ class HuggingfaceLLM: self.logger.error(f"Failed to load the model or the tokenizer: {error}") raise + def run(self, task: str): def run(self, task: str): """ Generate a response based on the prompt text. Args: - task (str): Text to prompt the model. + - task (str): Text to prompt the model. - max_length (int): Maximum length of the response. Returns: @@ -123,8 +135,11 @@ class HuggingfaceLLM: self.print_dashboard(task) + self.print_dashboard(task) + try: inputs = self.tokenizer.encode(task, return_tensors="pt").to(self.device) + inputs = self.tokenizer.encode(task, return_tensors="pt").to(self.device) # self.log.start() @@ -157,7 +172,15 @@ class HuggingfaceLLM: del inputs return self.tokenizer.decode(outputs[0], skip_special_tokens=True) except Exception as e: - self.logger.error(f"Failed to generate the text: {e}") + print( + colored( + ( + f"HuggingfaceLLM could not generate text because of error: {e}," + " try optimizing your arguments" + ), + "red", + ) + ) raise async def run_async(self, task: str, *args, **kwargs) -> str: @@ -183,12 +206,14 @@ class HuggingfaceLLM: # Wrapping synchronous calls with async return self.run(task, *args, **kwargs) + def __call__(self, task: str): def __call__(self, task: str): """ Generate a response based on the prompt text. Args: - task (str): Text to prompt the model. + - task (str): Text to prompt the model. - max_length (int): Maximum length of the response. Returns: @@ -198,10 +223,14 @@ class HuggingfaceLLM: max_length = self.max_length + self.print_dashboard(task) + max_length = self.max_length + self.print_dashboard(task) try: inputs = self.tokenizer.encode(task, return_tensors="pt").to(self.device) + inputs = self.tokenizer.encode(task, return_tensors="pt").to(self.device) # self.log.start() @@ -314,3 +343,57 @@ class HuggingfaceLLM: def clear_chat_history(self): """Clear chat history""" self.chat_history = [] + + def print_dashboard(self, task: str): + """Print dashboard""" + + dashboard = print( + colored( + f""" + HuggingfaceLLM Dashboard + -------------------------------------------- + Model Name: {self.model_id} + Tokenizer: {self.tokenizer} + Model MaxLength: {self.max_length} + Model Device: {self.device} + Model Quantization: {self.quantize} + Model Quantization Config: {self.quantization_config} + Model Verbose: {self.verbose} + Model Distributed: {self.distributed} + Model Decoding: {self.decoding} + + ---------------------------------------- + Metadata: + Task Memory Consumption: {self.memory_consumption()} + GPU Available: {self.gpu_available()} + ---------------------------------------- + + Task Environment: + Task: {task} + + """, + "red", + ) + ) + + print(dashboard) + + def set_device(self, device): + """ + Changes the device used for inference. + + Parameters + ---------- + device : str + The new device to use for inference. + """ + self.device = device + self.model.to(self.device) + + def set_max_length(self, max_length): + """Set max_length""" + self.max_length = max_length + + def clear_chat_history(self): + """Clear chat history""" + self.chat_history = [] diff --git a/swarms/models/kosmos2.py b/swarms/models/kosmos2.py new file mode 100644 index 00000000..12d5638a --- /dev/null +++ b/swarms/models/kosmos2.py @@ -0,0 +1,100 @@ +from typing import List, Tuple + +import numpy as np +from PIL import Image +from pydantic import BaseModel, root_validator, validator +from transformers import AutoModelForVision2Seq, AutoProcessor + + +# Assuming the Detections class represents the output of the model prediction +class Detections(BaseModel): + xyxy: List[Tuple[float, float, float, float]] + class_id: List[int] + confidence: List[float] + + @root_validator + def check_length(cls, values): + assert ( + len(values.get("xyxy")) + == len(values.get("class_id")) + == len(values.get("confidence")) + ), "All fields must have the same length." + return values + + @validator("xyxy", "class_id", "confidence", pre=True, each_item=True) + def check_not_empty(cls, v): + if isinstance(v, list) and len(v) == 0: + raise ValueError("List must not be empty") + return v + + @classmethod + def empty(cls): + return cls(xyxy=[], class_id=[], confidence=[]) + + +class Kosmos2(BaseModel): + model: AutoModelForVision2Seq + processor: AutoProcessor + + @classmethod + def initialize(cls): + model = AutoModelForVision2Seq.from_pretrained( + "ydshieh/kosmos-2-patch14-224", trust_remote_code=True + ) + processor = AutoProcessor.from_pretrained( + "ydshieh/kosmos-2-patch14-224", trust_remote_code=True + ) + return cls(model=model, processor=processor) + + def __call__(self, img: str) -> Detections: + image = Image.open(img) + prompt = "An image of" + + inputs = self.processor(text=prompt, images=image, return_tensors="pt") + outputs = self.model.generate(**inputs, use_cache=True, max_new_tokens=64) + + generated_text = self.processor.batch_decode(outputs, skip_special_tokens=True)[ + 0 + ] + + # The actual processing of generated_text to entities would go here + # For the purpose of this example, assume a mock function 'extract_entities' exists: + entities = self.extract_entities(generated_text) + + # Convert entities to detections format + detections = self.process_entities_to_detections(entities, image) + return detections + + def extract_entities( + self, text: str + ) -> List[Tuple[str, Tuple[float, float, float, float]]]: + # Placeholder function for entity extraction + # This should be replaced with the actual method of extracting entities + return [] + + def process_entities_to_detections( + self, + entities: List[Tuple[str, Tuple[float, float, float, float]]], + image: Image.Image, + ) -> Detections: + if not entities: + return Detections.empty() + + class_ids = [0] * len(entities) # Replace with actual class ID extraction logic + xyxys = [ + ( + e[1][0] * image.width, + e[1][1] * image.height, + e[1][2] * image.width, + e[1][3] * image.height, + ) + for e in entities + ] + confidences = [1.0] * len(entities) # Placeholder confidence + + return Detections(xyxy=xyxys, class_id=class_ids, confidence=confidences) + + +# Usage: +# kosmos2 = Kosmos2.initialize() +# detections = kosmos2(img="path_to_image.jpg") diff --git a/swarms/models/kosmos_two.py b/swarms/models/kosmos_two.py index eee834f3..596886f3 100644 --- a/swarms/models/kosmos_two.py +++ b/swarms/models/kosmos_two.py @@ -106,7 +106,10 @@ class Kosmos: self.run(prompt, image_url) def referring_expression_generation(self, phrase, image_url): - prompt = " It is" + prompt = ( + "" + " It is" + ) self.run(prompt, image_url) def grounded_vqa(self, question, image_url): diff --git a/swarms/models/layoutlm_document_qa.py b/swarms/models/layoutlm_document_qa.py index 6fe83210..e2b8d1e4 100644 --- a/swarms/models/layoutlm_document_qa.py +++ b/swarms/models/layoutlm_document_qa.py @@ -3,10 +3,9 @@ LayoutLMDocumentQA is a multimodal good for visual question answering on real world docs lik invoice, pdfs, etc """ from transformers import pipeline -from swarms.models.base import AbstractModel -class LayoutLMDocumentQA(AbstractModel): +class LayoutLMDocumentQA: """ LayoutLMDocumentQA for document question answering: @@ -25,9 +24,9 @@ class LayoutLMDocumentQA(AbstractModel): def __init__( self, model_name: str = "impira/layoutlm-document-qa", - task: str = "document-question-answering", + task_type: str = "document-question-answering", ): - self.pipeline = pipeline(self.task, model=self.model_name) + self.pipeline = pipeline(self.task_type, model=self.model_name) def __call__(self, task: str, img_path: str): """Call for model""" diff --git a/swarms/models/nougat.py b/swarms/models/nougat.py index cc154283..f156981c 100644 --- a/swarms/models/nougat.py +++ b/swarms/models/nougat.py @@ -8,7 +8,7 @@ format - Extracting metadata from pdfs """ - +import re import torch from PIL import Image from transformers import NougatProcessor, VisionEncoderDecoderModel @@ -61,9 +61,28 @@ class Nougat: pixel_values.to(self.device), min_length=self.min_length, max_new_tokens=self.max_new_tokens, - bad_words_ids=[[self.processor.unk_token - id]], ) sequence = self.processor.batch_decode(outputs, skip_special_tokens=True)[0] sequence = self.processor.post_process_generation(sequence, fix_markdown=False) - return sequence + + out = print(sequence) + return out + + def clean_nougat_output(raw_output): + # Define the pattern to extract the relevant data + daily_balance_pattern = ( + r"\*\*(\d{2}/\d{2}/\d{4})\*\*\n\n\*\*([\d,]+\.\d{2})\*\*" + ) + + # Find all matches of the pattern + matches = re.findall(daily_balance_pattern, raw_output) + + # Convert the matches to a readable format + cleaned_data = [ + "Date: {}, Amount: {}".format(date, amount.replace(",", "")) + for date, amount in matches + ] + + # Join the cleaned data with new lines for readability + return "\n".join(cleaned_data) diff --git a/swarms/models/openai_assistant.py b/swarms/models/openai_assistant.py new file mode 100644 index 00000000..6d0c518f --- /dev/null +++ b/swarms/models/openai_assistant.py @@ -0,0 +1,74 @@ +from typing import Dict, List, Optional +from dataclass import dataclass + +from swarms.models import OpenAI + + +@dataclass +class OpenAIAssistant: + name: str = "OpenAI Assistant" + instructions: str = None + tools: List[Dict] = None + model: str = None + openai_api_key: str = None + temperature: float = 0.5 + max_tokens: int = 100 + stop: List[str] = None + echo: bool = False + stream: bool = False + log: bool = False + presence: bool = False + dashboard: bool = False + debug: bool = False + max_loops: int = 5 + stopping_condition: Optional[str] = None + loop_interval: int = 1 + retry_attempts: int = 3 + retry_interval: int = 1 + interactive: bool = False + dynamic_temperature: bool = False + state: Dict = None + response_filters: List = None + response_filter: Dict = None + response_filter_name: str = None + response_filter_value: str = None + response_filter_type: str = None + response_filter_action: str = None + response_filter_action_value: str = None + response_filter_action_type: str = None + response_filter_action_name: str = None + client = OpenAI() + role: str = "user" + instructions: str = None + + def create_assistant(self, task: str): + assistant = self.client.create_assistant( + name=self.name, + instructions=self.instructions, + tools=self.tools, + model=self.model, + ) + return assistant + + def create_thread(self): + thread = self.client.beta.threads.create() + return thread + + def add_message_to_thread(self, thread_id: str, message: str): + message = self.client.beta.threads.add_message( + thread_id=thread_id, role=self.user, content=message + ) + return message + + def run(self, task: str): + run = self.client.beta.threads.runs.create( + thread_id=self.create_thread().id, + assistant_id=self.create_assistant().id, + instructions=self.instructions, + ) + + out = self.client.beta.threads.runs.retrieve( + thread_id=run.thread_id, run_id=run.id + ) + + return out diff --git a/swarms/models/openai_embeddings.py b/swarms/models/openai_embeddings.py index 0aa3473d..81dea550 100644 --- a/swarms/models/openai_embeddings.py +++ b/swarms/models/openai_embeddings.py @@ -233,7 +233,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): if invalid_model_kwargs: raise ValueError( f"Parameters {invalid_model_kwargs} should be specified explicitly. " - f"Instead they were passed in as part of `model_kwargs` parameter." + "Instead they were passed in as part of `model_kwargs` parameter." ) values["model_kwargs"] = extra diff --git a/swarms/models/openai_models.py b/swarms/models/openai_models.py index db030a71..4b0cc91d 100644 --- a/swarms/models/openai_models.py +++ b/swarms/models/openai_models.py @@ -500,7 +500,10 @@ class BaseOpenAI(BaseLLM): if self.openai_proxy: import openai - openai.proxy = {"http": self.openai_proxy, "https": self.openai_proxy} # type: ignore[assignment] # noqa: E501 + openai.proxy = { + "http": self.openai_proxy, + "https": self.openai_proxy, + } # type: ignore[assignment] # noqa: E501 return {**openai_creds, **self._default_params} @property @@ -785,7 +788,10 @@ class OpenAIChat(BaseLLM): if openai_organization: openai.organization = openai_organization if openai_proxy: - openai.proxy = {"http": openai_proxy, "https": openai_proxy} # type: ignore[assignment] # noqa: E501 + openai.proxy = { + "http": openai_proxy, + "https": openai_proxy, + } # type: ignore[assignment] # noqa: E501 except ImportError: raise ImportError( "Could not import openai python package. " diff --git a/swarms/models/openai_tokenizer.py b/swarms/models/openai_tokenizer.py new file mode 100644 index 00000000..9ff1fa08 --- /dev/null +++ b/swarms/models/openai_tokenizer.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +import logging +from abc import ABC, abstractmethod +from typing import Optional + +import tiktoken +from attr import Factory, define, field + + +@define(frozen=True) +class BaseTokenizer(ABC): + DEFAULT_STOP_SEQUENCES = ["Observation:"] + + stop_sequences: list[str] = field( + default=Factory(lambda: BaseTokenizer.DEFAULT_STOP_SEQUENCES), + kw_only=True, + ) + + @property + @abstractmethod + def max_tokens(self) -> int: + ... + + def count_tokens_left(self, text: str) -> int: + diff = self.max_tokens - self.count_tokens(text) + + if diff > 0: + return diff + else: + return 0 + + @abstractmethod + def count_tokens(self, text: str) -> int: + ... + + +@define(frozen=True) +class OpenAITokenizer(BaseTokenizer): + DEFAULT_OPENAI_GPT_3_COMPLETION_MODEL = "text-davinci-003" + DEFAULT_OPENAI_GPT_3_CHAT_MODEL = "gpt-3.5-turbo" + DEFAULT_OPENAI_GPT_4_MODEL = "gpt-4" + DEFAULT_ENCODING = "cl100k_base" + DEFAULT_MAX_TOKENS = 2049 + TOKEN_OFFSET = 8 + + MODEL_PREFIXES_TO_MAX_TOKENS = { + "gpt-4-32k": 32768, + "gpt-4": 8192, + "gpt-3.5-turbo-16k": 16384, + "gpt-3.5-turbo": 4096, + "gpt-35-turbo-16k": 16384, + "gpt-35-turbo": 4096, + "text-davinci-003": 4097, + "text-davinci-002": 4097, + "code-davinci-002": 8001, + "text-embedding-ada-002": 8191, + "text-embedding-ada-001": 2046, + } + + EMBEDDING_MODELS = ["text-embedding-ada-002", "text-embedding-ada-001"] + + model: str = field(kw_only=True) + + @property + def encoding(self) -> tiktoken.Encoding: + try: + return tiktoken.encoding_for_model(self.model) + except KeyError: + return tiktoken.get_encoding(self.DEFAULT_ENCODING) + + @property + def max_tokens(self) -> int: + tokens = next( + v + for k, v in self.MODEL_PREFIXES_TO_MAX_TOKENS.items() + if self.model.startswith(k) + ) + offset = 0 if self.model in self.EMBEDDING_MODELS else self.TOKEN_OFFSET + + return (tokens if tokens else self.DEFAULT_MAX_TOKENS) - offset + + def count_tokens(self, text: str | list, model: Optional[str] = None) -> int: + """ + Handles the special case of ChatML. Implementation adopted from the official OpenAI notebook: + https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb + """ + if isinstance(text, list): + model = model if model else self.model + + try: + encoding = tiktoken.encoding_for_model(model) + except KeyError: + logging.warning("model not found. Using cl100k_base encoding.") + + encoding = tiktoken.get_encoding("cl100k_base") + + if model in { + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + "gpt-4-0314", + "gpt-4-32k-0314", + "gpt-4-0613", + "gpt-4-32k-0613", + }: + tokens_per_message = 3 + tokens_per_name = 1 + elif model == "gpt-3.5-turbo-0301": + # every message follows <|start|>{role/name}\n{content}<|end|>\n + tokens_per_message = 4 + # if there's a name, the role is omitted + tokens_per_name = -1 + elif "gpt-3.5-turbo" in model or "gpt-35-turbo" in model: + logging.info( + "gpt-3.5-turbo may update over time. Returning num tokens assuming" + " gpt-3.5-turbo-0613." + ) + return self.count_tokens(text, model="gpt-3.5-turbo-0613") + elif "gpt-4" in model: + logging.info( + "gpt-4 may update over time. Returning num tokens assuming" + " gpt-4-0613." + ) + return self.count_tokens(text, model="gpt-4-0613") + else: + raise NotImplementedError( + f"""token_count() is not implemented for model {model}. + See https://github.com/openai/openai-python/blob/main/chatml.md for + information on how messages are converted to tokens.""" + ) + + num_tokens = 0 + + for message in text: + num_tokens += tokens_per_message + for key, value in message.items(): + num_tokens += len(encoding.encode(value)) + if key == "name": + num_tokens += tokens_per_name + + # every reply is primed with <|start|>assistant<|message|> + num_tokens += 3 + + return num_tokens + else: + return len( + self.encoding.encode(text, allowed_special=set(self.stop_sequences)) + ) diff --git a/swarms/models/timm.py b/swarms/models/timm.py new file mode 100644 index 00000000..5d9b965a --- /dev/null +++ b/swarms/models/timm.py @@ -0,0 +1,67 @@ +from typing import List + +import timm +import torch +from pydantic import BaseModel, conlist + + +class TimmModelInfo(BaseModel): + model_name: str + pretrained: bool + in_chans: int + + class Config: + # Use strict typing for all fields + strict = True + + +class TimmModel: + """ + + # Usage + model_handler = TimmModelHandler() + model_info = TimmModelInfo(model_name='resnet34', pretrained=True, in_chans=1) + input_tensor = torch.randn(1, 1, 224, 224) + output_shape = model_handler(model_info=model_info, input_tensor=input_tensor) + print(output_shape) + + """ + + def __init__(self): + self.models = self._get_supported_models() + + def _get_supported_models(self) -> List[str]: + """Retrieve the list of supported models from timm.""" + return timm.list_models() + + def _create_model(self, model_info: TimmModelInfo) -> torch.nn.Module: + """ + Create a model instance from timm with specified parameters. + + Args: + model_info: An instance of TimmModelInfo containing model specifications. + + Returns: + An instance of a pytorch model. + """ + return timm.create_model( + model_info.model_name, + pretrained=model_info.pretrained, + in_chans=model_info.in_chans, + ) + + def __call__( + self, model_info: TimmModelInfo, input_tensor: torch.Tensor + ) -> torch.Size: + """ + Create and run a model specified by `model_info` on `input_tensor`. + + Args: + model_info: An instance of TimmModelInfo containing model specifications. + input_tensor: A torch tensor representing the input data. + + Returns: + The shape of the output from the model. + """ + model = self._create_model(model_info) + return model(input_tensor).shape diff --git a/swarms/models/zephyr.py b/swarms/models/zephyr.py index 582bc740..4fca5211 100644 --- a/swarms/models/zephyr.py +++ b/swarms/models/zephyr.py @@ -25,38 +25,80 @@ class Zephyr: def __init__( self, + model_name: str = "HuggingFaceH4/zephyr-7b-alpha", + tokenize: bool = False, + add_generation_prompt: bool = True, + system_prompt: str = "You are a friendly chatbot who always responds in the style of a pirate", max_new_tokens: int = 300, temperature: float = 0.5, top_k: float = 50, top_p: float = 0.95, + do_sample: bool = True, + *args, + **kwargs, ): super().__init__() + self.model_name = model_name + self.tokenize = tokenize + self.add_generation_prompt = add_generation_prompt + self.system_prompt = system_prompt self.max_new_tokens = max_new_tokens self.temperature = temperature self.top_k = top_k self.top_p = top_p + self.do_sample = do_sample self.pipe = pipeline( "text-generation", - model="HuggingFaceH4/zephyr-7b-alpha", - torch_dtype=torch.bfloa16, + model=self.model_name, + torch_dtype=torch.bfloat16, device_map="auto", ) self.messages = [ { "role": "system", - "content": "You are a friendly chatbot who always responds in the style of a pirate", - }, - { - "role": "user", - "content": "How many helicopters can a human eat in one sitting?", + "content": f"{self.system_prompt}\n\nUser:", }, ] - def __call__(self, text: str): + def __call__(self, task: str): """Call the model""" prompt = self.pipe.tokenizer.apply_chat_template( - self.messages, tokenize=False, add_generation_prompt=True + self.messages, + tokenize=self.tokenize, + add_generation_prompt=self.add_generation_prompt, + ) + outputs = self.pipe(prompt) # max_new_token=self.max_new_tokens) + print(outputs[0]["generated_text"]) + + def chat(self, message: str): + """ + Adds a user message to the conversation and generates a chatbot response. + """ + # Add the user message to the conversation + self.messages.append({"role": "user", "content": message}) + + # Apply the chat template to format the messages + prompt = self.pipe.tokenizer.apply_chat_template( + self.messages, + tokenize=self.tokenize, + add_generation_prompt=self.add_generation_prompt, + ) + + # Generate a response + outputs = self.pipe( + prompt, + max_new_tokens=self.max_new_tokens, + do_sample=self.do_sample, + temperature=self.temperature, + top_k=self.top_k, + top_p=self.top_p, ) - outputs = self.pipe(prompt, max_new_token=self.max_new_tokens) - print(outputs[0])["generated_text"] + + # Extract the generated text + generated_text = outputs[0]["generated_text"] + + # Optionally, you could also add the chatbot's response to the messages list + # However, the below line should be adjusted to extract the chatbot's response only + # self.messages.append({"role": "bot", "content": generated_text}) + return generated_text diff --git a/swarms/prompts/agent_prompt.py b/swarms/prompts/agent_prompt.py index 747b7949..c4897193 100644 --- a/swarms/prompts/agent_prompt.py +++ b/swarms/prompts/agent_prompt.py @@ -70,9 +70,9 @@ class PromptGenerator: f"Commands:\n{''.join(self.commands)}\n\n" f"Resources:\n{''.join(self.resources)}\n\n" f"Performance Evaluation:\n{''.join(self.performance_evaluation)}\n\n" - f"You should only respond in JSON format as described below " + "You should only respond in JSON format as described below " f"\nResponse Format: \n{formatted_response_format} " - f"\nEnsure the response can be parsed by Python json.loads" + "\nEnsure the response can be parsed by Python json.loads" ) return prompt_string diff --git a/swarms/prompts/agent_prompts.py b/swarms/prompts/agent_prompts.py index 350545ff..8d145fc0 100644 --- a/swarms/prompts/agent_prompts.py +++ b/swarms/prompts/agent_prompts.py @@ -4,10 +4,28 @@ def generate_agent_role_prompt(agent): Returns: str: The agent role prompt. """ prompts = { - "Finance Agent": "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends.", - "Travel Agent": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights.", - "Academic Research Agent": "You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work.", - "Default Agent": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text.", + "Finance Agent": ( + "You are a seasoned finance analyst AI assistant. Your primary goal is to" + " compose comprehensive, astute, impartial, and methodically arranged" + " financial reports based on provided data and trends." + ), + "Travel Agent": ( + "You are a world-travelled AI tour guide assistant. Your main purpose is to" + " draft engaging, insightful, unbiased, and well-structured travel reports" + " on given locations, including history, attractions, and cultural" + " insights." + ), + "Academic Research Agent": ( + "You are an AI academic research assistant. Your primary responsibility is" + " to create thorough, academically rigorous, unbiased, and systematically" + " organized reports on a given research topic, following the standards of" + " scholarly work." + ), + "Default Agent": ( + "You are an AI critical thinker research assistant. Your sole purpose is to" + " write well written, critically acclaimed, objective and structured" + " reports on given text." + ), } return prompts.get(agent, "No such agent") @@ -22,10 +40,11 @@ def generate_report_prompt(question, research_summary): return ( f'"""{research_summary}""" Using the above information, answer the following' - f' question or topic: "{question}" in a detailed report --' - " The report should focus on the answer to the question, should be well structured, informative," - " in depth, with facts and numbers if available, a minimum of 1,200 words and with markdown syntax and apa format. " - "Write all source urls at the end of the report in apa format" + f' question or topic: "{question}" in a detailed report -- The report should' + " focus on the answer to the question, should be well structured, informative," + " in depth, with facts and numbers if available, a minimum of 1,200 words and" + " with markdown syntax and apa format. Write all source urls at the end of the" + " report in apa format" ) @@ -36,8 +55,9 @@ def generate_search_queries_prompt(question): """ return ( - f'Write 4 google search queries to search online that form an objective opinion from the following: "{question}"' - f'You must respond with a list of strings in the following format: ["query 1", "query 2", "query 3", "query 4"]' + "Write 4 google search queries to search online that form an objective opinion" + f' from the following: "{question}"You must respond with a list of strings in' + ' the following format: ["query 1", "query 2", "query 3", "query 4"]' ) @@ -52,13 +72,15 @@ def generate_resource_report_prompt(question, research_summary): str: The resource report prompt for the given question and research summary. """ return ( - f'"""{research_summary}""" Based on the above information, generate a bibliography recommendation report for the following' - f' question or topic: "{question}". The report should provide a detailed analysis of each recommended resource,' - " explaining how each source can contribute to finding answers to the research question." - " Focus on the relevance, reliability, and significance of each source." - " Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax." - " Include relevant facts, figures, and numbers whenever available." - " The report should have a minimum length of 1,200 words." + f'"""{research_summary}""" Based on the above information, generate a' + " bibliography recommendation report for the following question or topic:" + f' "{question}". The report should provide a detailed analysis of each' + " recommended resource, explaining how each source can contribute to finding" + " answers to the research question. Focus on the relevance, reliability, and" + " significance of each source. Ensure that the report is well-structured," + " informative, in-depth, and follows Markdown syntax. Include relevant facts," + " figures, and numbers whenever available. The report should have a minimum" + " length of 1,200 words." ) @@ -70,11 +92,13 @@ def generate_outline_report_prompt(question, research_summary): """ return ( - f'"""{research_summary}""" Using the above information, generate an outline for a research report in Markdown syntax' - f' for the following question or topic: "{question}". The outline should provide a well-structured framework' - " for the research report, including the main sections, subsections, and key points to be covered." - " The research report should be detailed, informative, in-depth, and a minimum of 1,200 words." - " Use appropriate Markdown syntax to format the outline and ensure readability." + f'"""{research_summary}""" Using the above information, generate an outline for' + " a research report in Markdown syntax for the following question or topic:" + f' "{question}". The outline should provide a well-structured framework for the' + " research report, including the main sections, subsections, and key points to" + " be covered. The research report should be detailed, informative, in-depth," + " and a minimum of 1,200 words. Use appropriate Markdown syntax to format the" + " outline and ensure readability." ) @@ -86,9 +110,11 @@ def generate_concepts_prompt(question, research_summary): """ return ( - f'"""{research_summary}""" Using the above information, generate a list of 5 main concepts to learn for a research report' - f' on the following question or topic: "{question}". The outline should provide a well-structured framework' - 'You must respond with a list of strings in the following format: ["concepts 1", "concepts 2", "concepts 3", "concepts 4, concepts 5"]' + f'"""{research_summary}""" Using the above information, generate a list of 5' + " main concepts to learn for a research report on the following question or" + f' topic: "{question}". The outline should provide a well-structured' + " frameworkYou must respond with a list of strings in the following format:" + ' ["concepts 1", "concepts 2", "concepts 3", "concepts 4, concepts 5"]' ) @@ -102,9 +128,10 @@ def generate_lesson_prompt(concept): """ prompt = ( - f"generate a comprehensive lesson about {concept} in Markdown syntax. This should include the definition" - f"of {concept}, its historical background and development, its applications or uses in different" - f"fields, and notable events or facts related to {concept}." + f"generate a comprehensive lesson about {concept} in Markdown syntax. This" + f" should include the definitionof {concept}, its historical background and" + " development, its applications or uses in differentfields, and notable events" + f" or facts related to {concept}." ) return prompt diff --git a/swarms/prompts/growth_agent_prompt.py b/swarms/prompts/growth_agent_prompt.py index 9ac74a06..117148d9 100644 --- a/swarms/prompts/growth_agent_prompt.py +++ b/swarms/prompts/growth_agent_prompt.py @@ -46,47 +46,47 @@ Growth Agent is a dynamic fusion of digital marketing, content creation, and cus - **3.1 Data Assimilation and Interpretation** - *3.1.1* Efficiently process vast volumes of data using state-of-the-art algorithms. - + - *3.1.2* Identify key patterns, trends, and anomalies to derive actionable insights. - + - *3.1.3* Use these insights to predict future trends and user behaviors. - **3.2 Ad Generation** - *3.2.1* Leverage Generative Adversarial Networks (GANs) to craft engaging ads. - + - *3.2.2* Implement A/B testing mechanisms to select high-performing ads. - + - *3.2.3* Continuously refine ad generation based on user feedback and interactions. - **3.3 Website Creation and Optimization** - *3.3.1* Use responsive design principles for accessibility across devices. - + - *3.3.2* Integrate user tracking tools to gain insights into navigation patterns. - + - *3.3.3* Leverage AI-driven chatbots and interactive elements to improve user engagement and retention. - **3.4 Messaging Sequences** - *3.4.1* Craft sequences tailored to individual user behaviors and interactions. - + - *3.4.2* Harness advanced Natural Language Processing (NLP) tools for optimal communication. - + - *3.4.3* Periodically update sequences based on user feedback and evolving market trends. - **3.5 Systematic Growth and Enhancement** - *3.5.1* Implement reinforcement learning for real-time adaptation and strategy refinement. - + - *3.5.2* Engage in regular feedback loops with users to understand needs and pain points. - + - *3.5.3* Benchmark performance against industry leaders to identify areas of improvement. - **3.6 Integration and Collaboration** - *3.6.1* Seamlessly integrate with other digital platforms and tools. - + - *3.6.2* Collaborate with other AI models or systems to harness collective intelligence. --- @@ -96,9 +96,9 @@ Growth Agent is a dynamic fusion of digital marketing, content creation, and cus Achieving world-class expertise is a journey, not a destination. Ensure: - **4.1** Regular system diagnostics and optimization checks. - + - **4.2** Inclusion of emerging platforms and technologies into the learning paradigm. - + - **4.3** Frequent benchmarking against top industry standards. --- diff --git a/swarms/prompts/multi_modal_prompts.py b/swarms/prompts/multi_modal_prompts.py index 9165eb3e..b552b68d 100644 --- a/swarms/prompts/multi_modal_prompts.py +++ b/swarms/prompts/multi_modal_prompts.py @@ -1,4 +1,7 @@ -ERROR_PROMPT = "An error has occurred for the following text: \n{promptedQuery} Please explain this error.\n {e}" +ERROR_PROMPT = ( + "An error has occurred for the following text: \n{promptedQuery} Please explain" + " this error.\n {e}" +) IMAGE_PROMPT = """ provide a figure named {filename}. The description is: {description}. @@ -9,7 +12,6 @@ USER INPUT ============ """ - AUDIO_PROMPT = """ provide a audio named {filename}. The description is: {description}. @@ -38,7 +40,6 @@ USER INPUT ============ """ - EVAL_PREFIX = """{bot_name} can execute any user's request. {bot_name} has permission to handle one instance and can handle the environment in it at will. diff --git a/swarms/prompts/python.py b/swarms/prompts/python.py index 6ddda6ae..9d1f4a1e 100644 --- a/swarms/prompts/python.py +++ b/swarms/prompts/python.py @@ -1,12 +1,43 @@ PY_SIMPLE_COMPLETION_INSTRUCTION = "# Write the body of this function only." -PY_REFLEXION_COMPLETION_INSTRUCTION = "You are a Python writing assistant. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Write your full implementation (restate the function signature).\n\n-----" -PY_SELF_REFLECTION_COMPLETION_INSTRUCTION = "You are a Python writing assistant. You will be given a function implementation and a series of unit tests. Your goal is to write a few sentences to explain why your implementation is wrong as indicated by the tests. You will need this as a hint when you try again later. Only provide the few sentence description in your answer, not the implementation.\n\n-----" -USE_PYTHON_CODEBLOCK_INSTRUCTION = "Use a Python code block to write your response. For example:\n```python\nprint('Hello world!')\n```" - -PY_SIMPLE_CHAT_INSTRUCTION = "You are an AI that only responds with python code, NOT ENGLISH. You will be given a function signature and its docstring by the user. Write your full implementation (restate the function signature)." -PY_SIMPLE_CHAT_INSTRUCTION_V2 = "You are an AI that only responds with only python code. You will be given a function signature and its docstring by the user. Write your full implementation (restate the function signature)." -PY_REFLEXION_CHAT_INSTRUCTION = "You are an AI Python assistant. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Write your full implementation (restate the function signature)." -PY_REFLEXION_CHAT_INSTRUCTION_V2 = "You are an AI Python assistant. You will be given your previous implementation of a function, a series of unit tests results, and your self-reflection on your previous implementation. Write your full implementation (restate the function signature)." +PY_REFLEXION_COMPLETION_INSTRUCTION = ( + "You are a Python writing assistant. You will be given your past function" + " implementation, a series of unit tests, and a hint to change the implementation" + " appropriately. Write your full implementation (restate the function" + " signature).\n\n-----" +) +PY_SELF_REFLECTION_COMPLETION_INSTRUCTION = ( + "You are a Python writing assistant. You will be given a function implementation" + " and a series of unit tests. Your goal is to write a few sentences to explain why" + " your implementation is wrong as indicated by the tests. You will need this as a" + " hint when you try again later. Only provide the few sentence description in your" + " answer, not the implementation.\n\n-----" +) +USE_PYTHON_CODEBLOCK_INSTRUCTION = ( + "Use a Python code block to write your response. For" + " example:\n```python\nprint('Hello world!')\n```" +) + +PY_SIMPLE_CHAT_INSTRUCTION = ( + "You are an AI that only responds with python code, NOT ENGLISH. You will be given" + " a function signature and its docstring by the user. Write your full" + " implementation (restate the function signature)." +) +PY_SIMPLE_CHAT_INSTRUCTION_V2 = ( + "You are an AI that only responds with only python code. You will be given a" + " function signature and its docstring by the user. Write your full implementation" + " (restate the function signature)." +) +PY_REFLEXION_CHAT_INSTRUCTION = ( + "You are an AI Python assistant. You will be given your past function" + " implementation, a series of unit tests, and a hint to change the implementation" + " appropriately. Write your full implementation (restate the function signature)." +) +PY_REFLEXION_CHAT_INSTRUCTION_V2 = ( + "You are an AI Python assistant. You will be given your previous implementation of" + " a function, a series of unit tests results, and your self-reflection on your" + " previous implementation. Write your full implementation (restate the function" + " signature)." +) PY_REFLEXION_FEW_SHOT_ADD = '''Example 1: [previous impl]: ```python @@ -139,8 +170,21 @@ def fullJustify(words: List[str], maxWidth: int) -> List[str]: END EXAMPLES ''' -PY_SELF_REFLECTION_CHAT_INSTRUCTION = "You are a Python programming assistant. You will be given a function implementation and a series of unit tests. Your goal is to write a few sentences to explain why your implementation is wrong as indicated by the tests. You will need this as a hint when you try again later. Only provide the few sentence description in your answer, not the implementation." -PY_SELF_REFLECTION_CHAT_INSTRUCTION_V2 = "You are a Python programming assistant. You will be given a function implementation and a series of unit test results. Your goal is to write a few sentences to explain why your implementation is wrong as indicated by the tests. You will need this as guidance when you try again later. Only provide the few sentence description in your answer, not the implementation. You will be given a few examples by the user." +PY_SELF_REFLECTION_CHAT_INSTRUCTION = ( + "You are a Python programming assistant. You will be given a function" + " implementation and a series of unit tests. Your goal is to write a few sentences" + " to explain why your implementation is wrong as indicated by the tests. You will" + " need this as a hint when you try again later. Only provide the few sentence" + " description in your answer, not the implementation." +) +PY_SELF_REFLECTION_CHAT_INSTRUCTION_V2 = ( + "You are a Python programming assistant. You will be given a function" + " implementation and a series of unit test results. Your goal is to write a few" + " sentences to explain why your implementation is wrong as indicated by the tests." + " You will need this as guidance when you try again later. Only provide the few" + " sentence description in your answer, not the implementation. You will be given a" + " few examples by the user." +) PY_SELF_REFLECTION_FEW_SHOT = """Example 1: [function impl]: ```python diff --git a/swarms/prompts/sales.py b/swarms/prompts/sales.py index 42f8d4ea..4f04f7fc 100644 --- a/swarms/prompts/sales.py +++ b/swarms/prompts/sales.py @@ -1,14 +1,40 @@ conversation_stages = { - "1": "Introduction: Start the conversation by introducing yourself and your company. Be polite and respectful while keeping the tone of the conversation professional. Your greeting should be welcoming. Always clarify in your greeting the reason why you are contacting the prospect.", - "2": "Qualification: Qualify the prospect by confirming if they are the right person to talk to regarding your product/service. Ensure that they have the authority to make purchasing decisions.", - "3": "Value proposition: Briefly explain how your product/service can benefit the prospect. Focus on the unique selling points and value proposition of your product/service that sets it apart from competitors.", - "4": "Needs analysis: Ask open-ended questions to uncover the prospect's needs and pain points. Listen carefully to their responses and take notes.", - "5": "Solution presentation: Based on the prospect's needs, present your product/service as the solution that can address their pain points.", - "6": "Objection handling: Address any objections that the prospect may have regarding your product/service. Be prepared to provide evidence or testimonials to support your claims.", - "7": "Close: Ask for the sale by proposing a next step. This could be a demo, a trial or a meeting with decision-makers. Ensure to summarize what has been discussed and reiterate the benefits.", + "1": ( + "Introduction: Start the conversation by introducing yourself and your company." + " Be polite and respectful while keeping the tone of the conversation" + " professional. Your greeting should be welcoming. Always clarify in your" + " greeting the reason why you are contacting the prospect." + ), + "2": ( + "Qualification: Qualify the prospect by confirming if they are the right person" + " to talk to regarding your product/service. Ensure that they have the" + " authority to make purchasing decisions." + ), + "3": ( + "Value proposition: Briefly explain how your product/service can benefit the" + " prospect. Focus on the unique selling points and value proposition of your" + " product/service that sets it apart from competitors." + ), + "4": ( + "Needs analysis: Ask open-ended questions to uncover the prospect's needs and" + " pain points. Listen carefully to their responses and take notes." + ), + "5": ( + "Solution presentation: Based on the prospect's needs, present your" + " product/service as the solution that can address their pain points." + ), + "6": ( + "Objection handling: Address any objections that the prospect may have" + " regarding your product/service. Be prepared to provide evidence or" + " testimonials to support your claims." + ), + "7": ( + "Close: Ask for the sale by proposing a next step. This could be a demo, a" + " trial or a meeting with decision-makers. Ensure to summarize what has been" + " discussed and reiterate the benefits." + ), } - SALES_AGENT_TOOLS_PROMPT = """ Never forget your name is {salesperson_name}. You work as a {salesperson_role}. You work at company named {company_name}. {company_name}'s business is the following: {company_business}. diff --git a/swarms/prompts/sales_prompts.py b/swarms/prompts/sales_prompts.py index 806f0ad2..3f2b9f2b 100644 --- a/swarms/prompts/sales_prompts.py +++ b/swarms/prompts/sales_prompts.py @@ -20,7 +20,6 @@ The answer needs to be one number only, no words. If there is no conversation history, output 1. Do not answer anything else nor add anything to you answer.""" - SALES = """Never forget your name is {salesperson_name}. You work as a {salesperson_role}. You work at company named {company_name}. {company_name}'s business is the following: {company_business} Company values are the following. {company_values} @@ -46,11 +45,38 @@ Conversation history: """ conversation_stages = { - "1": "Introduction: Start the conversation by introducing yourself and your company. Be polite and respectful while keeping the tone of the conversation professional. Your greeting should be welcoming. Always clarify in your greeting the reason why you are contacting the prospect.", - "2": "Qualification: Qualify the prospect by confirming if they are the right person to talk to regarding your product/service. Ensure that they have the authority to make purchasing decisions.", - "3": "Value proposition: Briefly explain how your product/service can benefit the prospect. Focus on the unique selling points and value proposition of your product/service that sets it apart from competitors.", - "4": "Needs analysis: Ask open-ended questions to uncover the prospect's needs and pain points. Listen carefully to their responses and take notes.", - "5": "Solution presentation: Based on the prospect's needs, present your product/service as the solution that can address their pain points.", - "6": "Objection handling: Address any objections that the prospect may have regarding your product/service. Be prepared to provide evidence or testimonials to support your claims.", - "7": "Close: Ask for the sale by proposing a next step. This could be a demo, a trial or a meeting with decision-makers. Ensure to summarize what has been discussed and reiterate the benefits.", + "1": ( + "Introduction: Start the conversation by introducing yourself and your company." + " Be polite and respectful while keeping the tone of the conversation" + " professional. Your greeting should be welcoming. Always clarify in your" + " greeting the reason why you are contacting the prospect." + ), + "2": ( + "Qualification: Qualify the prospect by confirming if they are the right person" + " to talk to regarding your product/service. Ensure that they have the" + " authority to make purchasing decisions." + ), + "3": ( + "Value proposition: Briefly explain how your product/service can benefit the" + " prospect. Focus on the unique selling points and value proposition of your" + " product/service that sets it apart from competitors." + ), + "4": ( + "Needs analysis: Ask open-ended questions to uncover the prospect's needs and" + " pain points. Listen carefully to their responses and take notes." + ), + "5": ( + "Solution presentation: Based on the prospect's needs, present your" + " product/service as the solution that can address their pain points." + ), + "6": ( + "Objection handling: Address any objections that the prospect may have" + " regarding your product/service. Be prepared to provide evidence or" + " testimonials to support your claims." + ), + "7": ( + "Close: Ask for the sale by proposing a next step. This could be a demo, a" + " trial or a meeting with decision-makers. Ensure to summarize what has been" + " discussed and reiterate the benefits." + ), } diff --git a/swarms/prompts/summaries_prompts.py b/swarms/prompts/summaries_prompts.py index 01c4c502..646d1ba0 100644 --- a/swarms/prompts/summaries_prompts.py +++ b/swarms/prompts/summaries_prompts.py @@ -10,7 +10,6 @@ summary. Pick a suitable emoji for every bullet point. Your response should be i a YouTube video, use the following text: {{CONTENT}}. """ - SUMMARIZE_PROMPT_2 = """ Provide a very short summary, no more than three sentences, for the following article: @@ -25,7 +24,6 @@ Summary: """ - SUMMARIZE_PROMPT_3 = """ Provide a TL;DR for the following article: @@ -39,7 +37,6 @@ Instead of computing on the individual qubits themselves, we will then compute o TL;DR: """ - SUMMARIZE_PROMPT_4 = """ Provide a very short summary in four bullet points for the following article: @@ -54,7 +51,6 @@ Bulletpoints: """ - SUMMARIZE_PROMPT_5 = """ Please generate a summary of the following conversation and at the end summarize the to-do's for the support Agent: diff --git a/swarms/prompts/task_assignment_prompt.py b/swarms/prompts/task_assignment_prompt.py index 9589d3f5..9dc59fa4 100644 --- a/swarms/prompts/task_assignment_prompt.py +++ b/swarms/prompts/task_assignment_prompt.py @@ -1,10 +1,10 @@ def task_planner_prompt(objective): return f""" - You are a planner who is an expert at coming up with a todo list for a given objective. - useful for when you need to come up with todo lists. + You are a planner who is an expert at coming up with a todo list for a given objective. + useful for when you need to come up with todo lists. - - Input: an objective to create a todo list for. Output: a todo list for that objective. For the main objective + + Input: an objective to create a todo list for. Output: a todo list for that objective. For the main objective layout each import subtask that needs to be accomplished and provide all subtasks with a ranking system prioritizing the most important subtasks first that are likely to accomplish the main objective. Use the following ranking system: 0.0 -> 1.0, 1.0 being the most important subtask. diff --git a/swarms/schemas/typings.py b/swarms/schemas/typings.py index faa902b5..2d848736 100644 --- a/swarms/schemas/typings.py +++ b/swarms/schemas/typings.py @@ -7,7 +7,6 @@ import platform from enum import Enum from typing import Union - python_version = list(platform.python_version_tuple()) SUPPORT_ADD_NOTES = int(python_version[0]) >= 3 and int(python_version[1]) >= 11 @@ -20,7 +19,10 @@ class ChatbotError(Exception): def __init__(self, *args: object) -> None: if SUPPORT_ADD_NOTES: super().add_note( - "Please check that the input is correct, or you can resolve this issue by filing an issue", + ( + "Please check that the input is correct, or you can resolve this" + " issue by filing an issue" + ), ) super().add_note("Project URL: https://github.com/acheong08/ChatGPT") super().__init__(*args) diff --git a/swarms/structs/__init__.py b/swarms/structs/__init__.py index d360fa78..a842359c 100644 --- a/swarms/structs/__init__.py +++ b/swarms/structs/__init__.py @@ -1,5 +1,6 @@ from swarms.structs.workflow import Workflow from swarms.structs.task import Task from swarms.structs.flow import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow -__all__ = ["Workflow", "Task", "Flow"] +__all__ = ["Workflow", "Task", "Flow", "SequentialWorkflow"] diff --git a/swarms/structs/base.py b/swarms/structs/base.py new file mode 100644 index 00000000..559416f0 --- /dev/null +++ b/swarms/structs/base.py @@ -0,0 +1,5 @@ +""" +Base Structure for all Swarm Structures + + +""" diff --git a/swarms/structs/flow.py b/swarms/structs/flow.py index 6069c25e..5b8aa3a5 100644 --- a/swarms/structs/flow.py +++ b/swarms/structs/flow.py @@ -4,40 +4,48 @@ TODO: - Add open interpreter style conversation - Add configurable save and restore so the user can restore from previus flows - Add memory vector database retrieval +- add a method that scrapes all the methods from the llm object and outputs them as a string +- Add tools +- Add open interpreter style conversation +- Add memory vector database retrieval +- add batch processing +- add async processing for run and batch run +- add plan module +- concurrent +- Add batched inputs """ - +import asyncio import json import logging import time -from typing import Any, Callable, Dict, List, Optional, Tuple, Generator +from typing import Any, Callable, Dict, List, Optional, Tuple from termcolor import colored import inspect import random -# from swarms.tools.tool import BaseTool +# Prompts +DYNAMIC_STOP_PROMPT = """ +When you have finished the task from the Human, output a special token: +This will enable you to leave the autonomous loop. +""" # Constants -FLOW_SYSTEM_PROMPT = """ +FLOW_SYSTEM_PROMPT = f""" You are an autonomous agent granted autonomy from a Flow structure. -Your role is to engage in multi-step conversations with your self or the user, -generate long-form content like blogs, screenplays, or SOPs, -and accomplish tasks. You can have internal dialogues with yourself or can interact with the user +Your role is to engage in multi-step conversations with your self or the user, +generate long-form content like blogs, screenplays, or SOPs, +and accomplish tasks. You can have internal dialogues with yourself or can interact with the user to aid in these complex tasks. Your responses should be coherent, contextually relevant, and tailored to the task at hand. -When you have finished the task, and you feel as if you are done: output a special token: -This will enable you to leave the flow loop. +{DYNAMIC_STOP_PROMPT} """ - -DYNAMIC_STOP_PROMPT = """ -When you have finished the task, and you feel as if you are done: output a special token: -This will enable you to leave the flow loop. -""" +# Utility functions -# Custome stopping condition +# Custom stopping condition def stop_when_repeats(response: str) -> bool: # Stop if the word stop appears in the response return "Stop" in response.lower() @@ -95,16 +103,23 @@ class Flow: def __init__( self, # template: str, - llm: Any, - max_loops: int = 5, + max_loops=5, stopping_condition: Optional[Callable[[str], bool]] = None, loop_interval: int = 1, retry_attempts: int = 3, retry_interval: int = 1, + return_history: bool = False, + dynamic_loops: Optional[bool] = False, interactive: bool = False, dashboard: bool = False, + agent_name: str = "Flow agent", + system_prompt: str = FLOW_SYSTEM_PROMPT, # tools: List[BaseTool] = None, dynamic_temperature: bool = False, + saved_state_path: Optional[str] = "flow_state.json", + autosave: bool = False, + context_length: int = 8192, + user_name: str = "Human", **kwargs: Any, ): # self.template = template @@ -121,9 +136,6 @@ class Flow: self.interactive = interactive self.dashboard = dashboard self.dynamic_temperature = dynamic_temperature -<<<<<<< HEAD - # self.tools = tools -======= self.tools = tools or [] def load_tools(self, task: str, **kwargs): @@ -139,7 +151,17 @@ class Flow: """, **kwargs ) ->>>>>>> 56fcab5 (feat: Setup dev env) + self.dynamic_loops = dynamic_loops + self.user_name = user_name + # The max_loops will be set dynamically if the dynamic_loop + if self.dynamic_loops: + self.max_loops = "auto" + # self.tools = tools + self.system_prompt = system_prompt + self.agent_name = agent_name + self.saved_state_path = saved_state_path + self.autosave = autosave + self.response_filters = [] def provide_feedback(self, feedback: str) -> None: """Allow users to provide feedback on the responses.""" @@ -152,11 +174,6 @@ class Flow: return self.stopping_condition(response) return False - def __call__(self, prompt, **kwargs) -> str: - """Invoke the flow by providing a template and its variables.""" - response = self.llm(prompt, **kwargs) - return response - def dynamic_temperature(self): """ 1. Check the self.llm object for the temperature @@ -195,9 +212,30 @@ class Flow: return "\n".join(params_str_list) + def truncate_history(self): + """ + Take the history and truncate it to fit into the model context length + """ + truncated_history = self.memory[-1][-self.context_length :] + self.memory[-1] = truncated_history + + def add_task_to_memory(self, task: str): + """Add the task to the memory""" + self.memory.append([f"{self.user_name}: {task}"]) + + def add_message_to_memory(self, message: str): + """Add the message to the memory""" + self.memory[-1].append(message) + + def add_message_to_memory_and_truncate(self, message: str): + """Add the message to the memory and truncate""" + self.memory[-1].append(message) + self.truncate_history() + def print_dashboard(self, task: str): """Print dashboard""" model_config = self.get_llm_init_params() + print(colored("Initializing Agent Dashboard...", "yellow")) dashboard = print( colored( @@ -211,6 +249,8 @@ class Flow: ---------------------------------------- Flow Configuration: + Name: {self.agent_name} + System Prompt: {self.system_prompt} Task: {task} Max Loops: {self.max_loops} Stopping Condition: {self.stopping_condition} @@ -218,6 +258,10 @@ class Flow: Retry Attempts: {self.retry_attempts} Retry Interval: {self.retry_interval} Interactive: {self.interactive} + Dashboard: {self.dashboard} + Dynamic Temperature: {self.dynamic_temperature} + Autosave: {self.autosave} + Saved State: {self.saved_state_path} ---------------------------------------- """, @@ -225,7 +269,27 @@ class Flow: ) ) - print(dashboard) + # print(dashboard) + + def activate_autonomous_agent(self): + """Print the autonomous agent activation message""" + try: + print(colored("Initializing Autonomous Agent...", "yellow")) + # print(colored("Loading modules...", "yellow")) + # print(colored("Modules loaded successfully.", "green")) + print(colored("Autonomous Agent Activated.", "cyan", attrs=["bold"])) + print(colored("All systems operational. Executing task...", "green")) + except Exception as error: + print( + colored( + ( + "Error activating autonomous agent. Try optimizing your" + " parameters..." + ), + "red", + ) + ) + print(error) def run(self, task: str, **kwargs): """ @@ -242,16 +306,23 @@ class Flow: 5. Repeat until stopping condition is met or max_loops is reached """ + # Activate Autonomous agent message + self.activate_autonomous_agent() + response = task - history = [f"Human: {task}"] + history = [f"{self.user_name}: {task}"] # If dashboard = True then print the dashboard if self.dashboard: self.print_dashboard(task) - for i in range(self.max_loops): - print(colored(f"\nLoop {i+1} of {self.max_loops}", "blue")) + loop_count = 0 + # for i in range(self.max_loops): + while self.max_loops == "auto" or loop_count < self.max_loops: + loop_count += 1 + print(colored(f"\nLoop {loop_count} of {self.max_loops}", "blue")) print("\n") + if self._check_stopping_condition(response) or parse_done_token(response): break @@ -259,22 +330,91 @@ class Flow: if self.dynamic_temperature: self.dynamic_temperature() + # Preparing the prompt + task = self.agent_history_prompt(FLOW_SYSTEM_PROMPT, response) + attempt = 0 while attempt < self.retry_attempts: try: response = self.llm( - f""" - SYSTEM_PROMPT: - {FLOW_SYSTEM_PROMPT} + task**kwargs, + ) + if self.interactive: + print(f"AI: {response}") + history.append(f"AI: {response}") + response = input("You: ") + history.append(f"Human: {response}") + else: + print(f"AI: {response}") + history.append(f"AI: {response}") + print(response) + break + except Exception as e: + logging.error(f"Error generating response: {e}") + attempt += 1 + time.sleep(self.retry_interval) + history.append(response) + time.sleep(self.loop_interval) + self.memory.append(history) + + if self.autosave: + save_path = self.saved_state_path or "flow_state.json" + print(colored(f"Autosaving flow state to {save_path}", "green")) + self.save_state(save_path) + + if self.return_history: + return response, history + return response + + async def arun(self, task: str, **kwargs): + """ + Run the autonomous agent loop aschnronously + + Args: + task (str): The initial task to run + + Flow: + 1. Generate a response + 2. Check stopping condition + 3. If stopping condition is met, stop + 4. If stopping condition is not met, generate a response + 5. Repeat until stopping condition is met or max_loops is reached + + """ + # Activate Autonomous agent message + self.activate_autonomous_agent() - History: {response} - - """, - **kwargs, + response = task + history = [f"{self.user_name}: {task}"] + + # If dashboard = True then print the dashboard + if self.dashboard: + self.print_dashboard(task) + + loop_count = 0 + # for i in range(self.max_loops): + while self.max_loops == "auto" or loop_count < self.max_loops: + loop_count += 1 + print(colored(f"\nLoop {loop_count} of {self.max_loops}", "blue")) + print("\n") + + if self._check_stopping_condition(response) or parse_done_token(response): + break + + # Adjust temperature, comment if no work + if self.dynamic_temperature: + self.dynamic_temperature() + + # Preparing the prompt + task = self.agent_history_prompt(FLOW_SYSTEM_PROMPT, response) + + attempt = 0 + while attempt < self.retry_attempts: + try: + response = self.llm( + task**kwargs, ) - # print(f"Next query: {response}") - # break if self.interactive: print(f"AI: {response}") history.append(f"AI: {response}") @@ -292,7 +432,16 @@ class Flow: history.append(response) time.sleep(self.loop_interval) self.memory.append(history) - return response # , history + + if self.autosave: + save_path = self.saved_state_path or "flow_state.json" + print(colored(f"Autosaving flow state to {save_path}", "green")) + self.save_state(save_path) + + if self.return_history: + return response, history + + return response def _run(self, **kwargs: Any) -> str: """Generate a result using the provided keyword args.""" @@ -301,32 +450,43 @@ class Flow: logging.info(f"Message history: {history}") return response - def bulk_run(self, inputs: List[Dict[str, Any]]) -> List[str]: - """Generate responses for multiple input sets.""" - return [self.run(**input_data) for input_data in inputs] - - def run_dynamically(self, task: str, max_loops: Optional[int] = None): + def agent_history_prompt( + self, + system_prompt: str = FLOW_SYSTEM_PROMPT, + history=None, + ): """ - Run the autonomous agent loop dynamically based on the + Generate the agent history prompt - # Usage Example + Args: + system_prompt (str): The system prompt + history (List[str]): The history of the conversation - # Initialize the Flow - flow = Flow(llm=lambda x: x, max_loops=5) + Returns: + str: The agent history prompt + """ + system_prompt = system_prompt or self.system_prompt + agent_history_prompt = f""" + SYSTEM_PROMPT: {system_prompt} - # Run dynamically based on token and optional max loops - response = flow.run_dynamically("Generate a report ", max_loops=3) - print(response) + History: {history} + """ + return agent_history_prompt - response = flow.run_dynamically("Generate a report ") - print(response) + async def run_concurrent(self, tasks: List[str], **kwargs): + """ + Run a batch of tasks concurrently and handle an infinite level of task inputs. + Args: + tasks (List[str]): A list of tasks to run. """ - if "" in task: - self.stopping_condition = parse_done_token - self.max_loops = max_loops or float("inf") - response = self.run(task) - return response + task_coroutines = [self.run_async(task, **kwargs) for task in tasks] + completed_tasks = await asyncio.gather(*task_coroutines) + return completed_tasks + + def bulk_run(self, inputs: List[Dict[str, Any]]) -> List[str]: + """Generate responses for multiple input sets.""" + return [self.run(**input_data) for input_data in inputs] @staticmethod def from_llm_and_template(llm: Any, template: str) -> "Flow": @@ -345,7 +505,13 @@ class Flow: json.dump(self.memory, f) print(f"Saved flow history to {file_path}") - def load(self, file_path) -> None: + def load(self, file_path: str): + """ + Load the flow history from a file. + + Args: + file_path (str): The path to the file containing the saved flow history. + """ with open(file_path, "r") as f: self.memory = json.load(f) print(f"Loaded flow history from {file_path}") @@ -357,6 +523,61 @@ class Flow: return False return True + def print_history_and_memory(self): + """ + Prints the entire history and memory of the flow. + Each message is colored and formatted for better readability. + """ + print(colored("Flow History and Memory", "cyan", attrs=["bold"])) + print(colored("========================", "cyan", attrs=["bold"])) + for loop_index, history in enumerate(self.memory, start=1): + print(colored(f"\nLoop {loop_index}:", "yellow", attrs=["bold"])) + for message in history: + speaker, _, message_text = message.partition(": ") + if "Human" in speaker: + print(colored(f"{speaker}:", "green") + f" {message_text}") + else: + print(colored(f"{speaker}:", "blue") + f" {message_text}") + print(colored("------------------------", "cyan")) + print(colored("End of Flow History", "cyan", attrs=["bold"])) + + def step(self, task: str, **kwargs): + """ + + Executes a single step in the flow interaction, generating a response + from the language model based on the given input text. + + Args: + input_text (str): The input text to prompt the language model with. + + Returns: + str: The language model's generated response. + + Raises: + Exception: If an error occurs during response generation. + + """ + try: + # Generate the response using lm + response = self.llm(task, **kwargs) + + # Update the flow's history with the new interaction + if self.interactive: + self.memory.append(f"AI: {response}") + self.memory.append(f"Human: {task}") + else: + self.memory.append(f"AI: {response}") + + return response + except Exception as error: + logging.error(f"Error generating response: {error}") + raise + + def graceful_shutdown(self): + """Gracefully shutdown the system saving the state""" + print(colored("Shutting down the system...", "red")) + return self.save_state("flow_state.json") + def run_with_timeout(self, task: str, timeout: int = 60) -> str: """Run the loop but stop if it takes longer than the timeout""" start_time = time.time() @@ -473,23 +694,129 @@ class Flow: print() return response - def streamed_token_generation(self, prompt: str) -> Generator[str, None, None]: + def get_llm_params(self): """ - Generate tokens in real-time for a given prompt. + Extracts and returns the parameters of the llm object for serialization. + It assumes that the llm object has an __init__ method + with parameters that can be used to recreate it. + """ + if not hasattr(self.llm, "__init__"): + return None - This method simulates the real-time generation of each token. - For simplicity, we treat each character of the input as a token - and yield them with a slight delay. In a real-world scenario, - this would involve using the LLM's internal methods to generate - the response token by token. + init_signature = inspect.signature(self.llm.__init__) + params = init_signature.parameters + llm_params = {} + + for name, param in params.items(): + if name == "self": + continue + if hasattr(self.llm, name): + value = getattr(self.llm, name) + if isinstance( + value, (str, int, float, bool, list, dict, tuple, type(None)) + ): + llm_params[name] = value + else: + llm_params[name] = str( + value + ) # For non-serializable objects, save their string representation. + + return llm_params + + def save_state(self, file_path: str) -> None: + """ + Saves the current state of the flow to a JSON file, including the llm parameters. Args: - prompt (str): The input prompt for which the tokens should be generated. + file_path (str): The path to the JSON file where the state will be saved. - Yields: - str: The next token (character) from the generated response. + Example: + >>> flow.save_state('saved_flow.json') """ - tokens = list(prompt) - for token in tokens: - time.sleep(0.1) - yield token + state = { + "memory": self.memory, + # "llm_params": self.get_llm_params(), + "loop_interval": self.loop_interval, + "retry_attempts": self.retry_attempts, + "retry_interval": self.retry_interval, + "interactive": self.interactive, + "dashboard": self.dashboard, + "dynamic_temperature": self.dynamic_temperature, + } + + with open(file_path, "w") as f: + json.dump(state, f, indent=4) + + saved = colored("Saved flow state to", "green") + print(f"{saved} {file_path}") + + def load_state(self, file_path: str): + """ + Loads the state of the flow from a json file and restores the configuration and memory. + + + Example: + >>> flow = Flow(llm=llm_instance, max_loops=5) + >>> flow.load_state('saved_flow.json') + >>> flow.run("Continue with the task") + + """ + with open(file_path, "r") as f: + state = json.load(f) + + # Restore other saved attributes + self.memory = state.get("memory", []) + self.max_loops = state.get("max_loops", 5) + self.loop_interval = state.get("loop_interval", 1) + self.retry_attempts = state.get("retry_attempts", 3) + self.retry_interval = state.get("retry_interval", 1) + self.interactive = state.get("interactive", False) + + print(f"Flow state loaded from {file_path}") + + def retry_on_failure(self, function, retries: int = 3, retry_delay: int = 1): + """Retry wrapper for LLM calls.""" + attempt = 0 + while attempt < retries: + try: + return function() + except Exception as error: + logging.error(f"Error generating response: {error}") + attempt += 1 + time.sleep(retry_delay) + raise Exception("All retry attempts failed") + + def generate_reply(self, history: str, **kwargs) -> str: + """ + Generate a response based on initial or task + """ + prompt = f""" + + SYSTEM_PROMPT: {self.system_prompt} + + History: {history} + + Your response: + """ + response = self.llm(prompt, **kwargs) + return {"role": self.agent_name, "content": response} + + def update_system_prompt(self, system_prompt: str): + """Upddate the system message""" + self.system_prompt = system_prompt + + def update_max_loops(self, max_loops: int): + """Update the max loops""" + self.max_loops = max_loops + + def update_loop_interval(self, loop_interval: int): + """Update the loop interval""" + self.loop_interval = loop_interval + + def update_retry_attempts(self, retry_attempts: int): + """Update the retry attempts""" + self.retry_attempts = retry_attempts + + def update_retry_interval(self, retry_interval: int): + """Update the retry interval""" + self.retry_interval = retry_interval diff --git a/swarms/structs/sequential_workflow.py b/swarms/structs/sequential_workflow.py index 2df95c07..8c7d9760 100644 --- a/swarms/structs/sequential_workflow.py +++ b/swarms/structs/sequential_workflow.py @@ -1,20 +1,432 @@ """ -Sequential Workflow +TODO: +- Add a method to update the arguments of a task +- Add a method to get the results of each task +- Add a method to get the results of a specific task +- Add a method to get the results of the workflow +- Add a method to get the results of the workflow as a dataframe -from swarms.models import OpenAIChat, Mistral -from swarms.structs import SequentialWorkflow +- Add a method to run the workflow in parallel with a pool of workers and a queue and a dashboard +- Add a dashboard to visualize the workflow +- Add async support +- Add context manager support +- Add workflow history +""" +import json +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, List, Optional, Union -llm = OpenAIChat(openai_api_key="") -mistral = Mistral() +from termcolor import colored -# Max loops will run over the sequential pipeline twice -workflow = SequentialWorkflow(max_loops=2) +from swarms.structs.flow import Flow -workflow.add("What's the weather in miami", llm) -workflow.add("Create a report on these metrics", mistral) +# Define a generic Task that can handle different types of callable objects +@dataclass +class Task: + """ + Task class for running a task in a sequential workflow. -workflow.run() -""" + Examples: + >>> from swarms.structs import Task, Flow + >>> from swarms.models import OpenAIChat + >>> flow = Flow(llm=OpenAIChat(openai_api_key=""), max_loops=1, dashboard=False) + >>> task = Task(description="What's the weather in miami", flow=flow) + >>> task.execute() + >>> task.result + + + + """ + + description: str + flow: Union[Callable, Flow] + args: List[Any] = field(default_factory=list) + kwargs: Dict[str, Any] = field(default_factory=dict) + result: Any = None + history: List[Any] = field(default_factory=list) + + def execute(self): + """ + Execute the task. + + Raises: + ValueError: If a Flow instance is used as a task and the 'task' argument is not provided. + + + + """ + if isinstance(self.flow, Flow): + # Add a prompt to notify the Flow of the sequential workflow + if "prompt" in self.kwargs: + self.kwargs["prompt"] += ( + f"\n\nPrevious output: {self.result}" if self.result else "" + ) + else: + self.kwargs["prompt"] = f"Main task: {self.description}" + ( + f"\n\nPrevious output: {self.result}" if self.result else "" + ) + self.result = self.flow.run(*self.args, **self.kwargs) + else: + self.result = self.flow(*self.args, **self.kwargs) + + self.history.append(self.result) + + +# SequentialWorkflow class definition using dataclasses +@dataclass +class SequentialWorkflow: + """ + SequentialWorkflow class for running a sequence of tasks using N number of autonomous agents. + + Args: + max_loops (int): The maximum number of times to run the workflow. + dashboard (bool): Whether to display the dashboard for the workflow. + + + Attributes: + tasks (List[Task]): The list of tasks to execute. + max_loops (int): The maximum number of times to run the workflow. + dashboard (bool): Whether to display the dashboard for the workflow. + + + Examples: + >>> from swarms.models import OpenAIChat + >>> from swarms.structs import SequentialWorkflow + >>> llm = OpenAIChat(openai_api_key="") + >>> workflow = SequentialWorkflow(max_loops=1) + >>> workflow.add("What's the weather in miami", llm) + >>> workflow.add("Create a report on these metrics", llm) + >>> workflow.run() + >>> workflow.tasks + + """ + + tasks: List[Task] = field(default_factory=list) + max_loops: int = 1 + autosave: bool = False + saved_state_filepath: Optional[str] = "sequential_workflow_state.json" + restore_state_filepath: Optional[str] = None + dashboard: bool = False + + def add(self, task: str, flow: Union[Callable, Flow], *args, **kwargs) -> None: + """ + Add a task to the workflow. + + Args: + task (str): The task description or the initial input for the Flow. + flow (Union[Callable, Flow]): The model or flow to execute the task. + *args: Additional arguments to pass to the task execution. + **kwargs: Additional keyword arguments to pass to the task execution. + """ + # If the flow is a Flow instance, we include the task in kwargs for Flow.run() + if isinstance(flow, Flow): + kwargs["task"] = task # Set the task as a keyword argument for Flow + + # Append the task to the tasks list + self.tasks.append( + Task(description=task, flow=flow, args=list(args), kwargs=kwargs) + ) + + def reset_workflow(self) -> None: + """Resets the workflow by clearing the results of each task.""" + for task in self.tasks: + task.result = None + + def get_task_results(self) -> Dict[str, Any]: + """ + Returns the results of each task in the workflow. + + Returns: + Dict[str, Any]: The results of each task in the workflow + """ + return {task.description: task.result for task in self.tasks} + + def remove_task(self, task_description: str) -> None: + self.tasks = [ + task for task in self.tasks if task.description != task_description + ] + + def update_task(self, task_description: str, **updates) -> None: + """ + Updates the arguments of a task in the workflow. + + Args: + task_description (str): The description of the task to update. + **updates: The updates to apply to the task. + + Raises: + ValueError: If the task is not found in the workflow. + + Examples: + >>> from swarms.models import OpenAIChat + >>> from swarms.structs import SequentialWorkflow + >>> llm = OpenAIChat(openai_api_key="") + >>> workflow = SequentialWorkflow(max_loops=1) + >>> workflow.add("What's the weather in miami", llm) + >>> workflow.add("Create a report on these metrics", llm) + >>> workflow.update_task("What's the weather in miami", max_tokens=1000) + >>> workflow.tasks[0].kwargs + {'max_tokens': 1000} + + """ + for task in self.tasks: + if task.description == task_description: + task.kwargs.update(updates) + break + else: + raise ValueError(f"Task {task_description} not found in workflow.") + + def save_workflow_state( + self, filepath: Optional[str] = "sequential_workflow_state.json", **kwargs + ) -> None: + """ + Saves the workflow state to a json file. + + Args: + filepath (str): The path to save the workflow state to. + + Examples: + >>> from swarms.models import OpenAIChat + >>> from swarms.structs import SequentialWorkflow + >>> llm = OpenAIChat(openai_api_key="") + >>> workflow = SequentialWorkflow(max_loops=1) + >>> workflow.add("What's the weather in miami", llm) + >>> workflow.add("Create a report on these metrics", llm) + >>> workflow.save_workflow_state("sequential_workflow_state.json") + """ + filepath = filepath or self.saved_state_filepath + + with open(filepath, "w") as f: + # Saving the state as a json for simplicuty + state = { + "tasks": [ + { + "description": task.description, + "args": task.args, + "kwargs": task.kwargs, + "result": task.result, + "history": task.history, + } + for task in self.tasks + ], + "max_loops": self.max_loops, + } + json.dump(state, f, indent=4) + + def workflow_bootup(self, **kwargs) -> None: + print( + colored( + """ + Sequential Workflow Initializing...""", + "green", + attrs=["bold", "underline"], + ) + ) + + def workflow_dashboard(self, **kwargs) -> None: + """ + Displays a dashboard for the workflow. + + Args: + **kwargs: Additional keyword arguments to pass to the dashboard. + + Examples: + >>> from swarms.models import OpenAIChat + >>> from swarms.structs import SequentialWorkflow + >>> llm = OpenAIChat(openai_api_key="") + >>> workflow = SequentialWorkflow(max_loops=1) + >>> workflow.add("What's the weather in miami", llm) + >>> workflow.add("Create a report on these metrics", llm) + >>> workflow.workflow_dashboard() + + """ + print( + colored( + f""" + Sequential Workflow Dashboard + -------------------------------- + Tasks: {len(self.tasks)} + Max Loops: {self.max_loops} + Autosave: {self.autosave} + Autosave Filepath: {self.saved_state_filepath} + Restore Filepath: {self.restore_state_filepath} + -------------------------------- + Metadata: + kwargs: {kwargs} + + + + + """, + "cyan", + attrs=["bold", "underline"], + ) + ) + + def workflow_shutdown(self, **kwargs) -> None: + print( + colored( + """ + Sequential Workflow Shutdown...""", + "red", + attrs=["bold", "underline"], + ) + ) + + def add_objective_to_workflow(self, task: str, **kwargs) -> None: + print( + colored( + """ + Adding Objective to Workflow...""", + "green", + attrs=["bold", "underline"], + ) + ) + + task = Task( + description=task, + flow=kwargs["flow"], + args=list(kwargs["args"]), + kwargs=kwargs["kwargs"], + ) + self.tasks.append(task) + + def load_workflow_state(self, filepath: str = None, **kwargs) -> None: + """ + Loads the workflow state from a json file and restores the workflow state. + + Args: + filepath (str): The path to load the workflow state from. + + Examples: + >>> from swarms.models import OpenAIChat + >>> from swarms.structs import SequentialWorkflow + >>> llm = OpenAIChat(openai_api_key="") + >>> workflow = SequentialWorkflow(max_loops=1) + >>> workflow.add("What's the weather in miami", llm) + >>> workflow.add("Create a report on these metrics", llm) + >>> workflow.save_workflow_state("sequential_workflow_state.json") + >>> workflow.load_workflow_state("sequential_workflow_state.json") + + """ + filepath = filepath or self.restore_state_filepath + + with open(filepath, "r") as f: + state = json.load(f) + self.max_loops = state["max_loops"] + self.tasks = [] + for task_state in state["tasks"]: + task = Task( + description=task_state["description"], + flow=task_state["flow"], + args=task_state["args"], + kwargs=task_state["kwargs"], + result=task_state["result"], + history=task_state["history"], + ) + self.tasks.append(task) + + def run(self) -> None: + """ + Run the workflow. + + Raises: + ValueError: If a Flow instance is used as a task and the 'task' argument is not provided. + + """ + try: + self.workflow_bootup() + for _ in range(self.max_loops): + for task in self.tasks: + # Check if the current task can be executed + if task.result is None: + # Check if the flow is a Flow and a 'task' argument is needed + if isinstance(task.flow, Flow): + # Ensure that 'task' is provided in the kwargs + if "task" not in task.kwargs: + raise ValueError( + "The 'task' argument is required for the Flow flow" + f" execution in '{task.description}'" + ) + # Separate the 'task' argument from other kwargs + flow_task_arg = task.kwargs.pop("task") + task.result = task.flow.run( + flow_task_arg, *task.args, **task.kwargs + ) + else: + # If it's not a Flow instance, call the flow directly + task.result = task.flow(*task.args, **task.kwargs) + + # Pass the result as an argument to the next task if it exists + next_task_index = self.tasks.index(task) + 1 + if next_task_index < len(self.tasks): + next_task = self.tasks[next_task_index] + if isinstance(next_task.flow, Flow): + # For Flow flows, 'task' should be a keyword argument + next_task.kwargs["task"] = task.result + else: + # For other callable flows, the result is added to args + next_task.args.insert(0, task.result) + + # Autosave the workflow state + if self.autosave: + self.save_workflow_state("sequential_workflow_state.json") + except Exception as e: + print( + colored( + ( + f"Error initializing the Sequential workflow: {e} try" + " optimizing your inputs like the flow class and task" + " description" + ), + "red", + attrs=["bold", "underline"], + ) + ) + + async def arun(self) -> None: + """ + Asynchronously run the workflow. + + Raises: + ValueError: If a Flow instance is used as a task and the 'task' argument is not provided. + + """ + for _ in range(self.max_loops): + for task in self.tasks: + # Check if the current task can be executed + if task.result is None: + # Check if the flow is a Flow and a 'task' argument is needed + if isinstance(task.flow, Flow): + # Ensure that 'task' is provided in the kwargs + if "task" not in task.kwargs: + raise ValueError( + "The 'task' argument is required for the Flow flow" + f" execution in '{task.description}'" + ) + # Separate the 'task' argument from other kwargs + flow_task_arg = task.kwargs.pop("task") + task.result = await task.flow.arun( + flow_task_arg, *task.args, **task.kwargs + ) + else: + # If it's not a Flow instance, call the flow directly + task.result = await task.flow(*task.args, **task.kwargs) + + # Pass the result as an argument to the next task if it exists + next_task_index = self.tasks.index(task) + 1 + if next_task_index < len(self.tasks): + next_task = self.tasks[next_task_index] + if isinstance(next_task.flow, Flow): + # For Flow flows, 'task' should be a keyword argument + next_task.kwargs["task"] = task.result + else: + # For other callable flows, the result is added to args + next_task.args.insert(0, task.result) + + # Autosave the workflow state + if self.autosave: + self.save_workflow_state("sequential_workflow_state.json") diff --git a/swarms/swarms/autobloggen.py b/swarms/swarms/autobloggen.py new file mode 100644 index 00000000..e69de29b diff --git a/swarms/swarms/autoscaler.py b/swarms/swarms/autoscaler.py index 55870112..5f6bedde 100644 --- a/swarms/swarms/autoscaler.py +++ b/swarms/swarms/autoscaler.py @@ -2,7 +2,7 @@ import queue import threading from time import sleep from swarms.utils.decorators import error_decorator, log_decorator, timing_decorator -from swarms.workers.worker import Worker +from swarms.structs.flow import Flow class AutoScaler: @@ -52,7 +52,7 @@ class AutoScaler: busy_threshold=0.7, agent=None, ): - self.agent = agent or Worker + self.agent = agent or Flow self.agents_pool = [self.agent() for _ in range(initial_agents)] self.task_queue = queue.Queue() self.scale_up_factor = scale_up_factor @@ -71,7 +71,7 @@ class AutoScaler: with self.lock: new_agents_counts = len(self.agents_pool) * self.scale_up_factor for _ in range(new_agents_counts): - self.agents_pool.append(Worker()) + self.agents_pool.append(Flow()) def scale_down(self): """scale down""" diff --git a/swarms/swarms/base.py b/swarms/swarms/base.py index 21f30ae3..e99c9b38 100644 --- a/swarms/swarms/base.py +++ b/swarms/swarms/base.py @@ -78,8 +78,6 @@ class AbstractSwarm(ABC): Scale down the number of workers - - """ # TODO: Pass in abstract LLM class that can utilize Hf or Anthropic models, Move away from OPENAI diff --git a/swarms/swarms/groupchat.py b/swarms/swarms/groupchat.py index 6f5f43b6..6be43a89 100644 --- a/swarms/swarms/groupchat.py +++ b/swarms/swarms/groupchat.py @@ -1,89 +1,112 @@ -from swarms.agents import SimpleAgent -from termcolor import colored +import logging +from dataclasses import dataclass +from typing import Dict, List +from swarms.structs.flow import Flow +logger = logging.getLogger(__name__) + +@dataclass class GroupChat: - """ - Groupchat - - Args: - agents (list): List of agents - dashboard (bool): Whether to print a dashboard or not - - Example: - >>> from swarms.structs import Flow - >>> from swarms.models import OpenAIChat - >>> from swarms.swarms.groupchat import GroupChat - >>> from swarms.agents import SimpleAgent - >>> api_key = "" - >>> llm = OpenAIChat() - >>> agent1 = SimpleAgent("Captain Price", Flow(llm=llm, max_loops=4)) - >>> agent2 = SimpleAgent("John Mactavis", Flow(llm=llm, max_loops=4)) - >>> chat = GroupChat([agent1, agent2]) - >>> chat.assign_duty(agent1.name, "Buy the groceries") - >>> chat.assign_duty(agent2.name, "Clean the house") - >>> response = chat.run("Captain Price", "Hello, how are you John?") - >>> print(response) - - - - """ - - def __init__(self, agents, dashboard: bool = False): - # Ensure that all provided agents are instances of simpleagents - if not all(isinstance(agent, SimpleAgent) for agent in agents): - raise ValueError("All agents must be instances of SimpleAgent") - self.agents = {agent.name: agent for agent in agents} - - # Dictionary to store duties for each agent - self.duties = {} - - # Dictionary to store roles for each agent - self.roles = {} - - self.dashboard = dashboard - - def assign_duty(self, agent_name, duty): - """Assigns duty to the agent""" - if agent_name not in self.agents: - raise ValueError(f"No agent named {agent_name} found.") - - def assign_role(self, agent_name, role): - """Assigns a role to the specified agent""" - if agent_name not in self.agents: - raise ValueError(f"No agent named {agent_name} found") - - self.roles[agent_name] = role - - def run(self, sender_name: str, message: str): - """Runs the groupchat""" - if self.dashboard: - metrics = print( - colored( - f""" - - Groupchat Configuration: - ------------------------ - - Agents: {self.agents} - Message: {message} - Sender: {sender_name} - """, - "red", - ) + """A group chat class that contains a list of agents and the maximum number of rounds.""" + + agents: List[Flow] + messages: List[Dict] + max_round: int = 10 + admin_name: str = "Admin" # the name of the admin agent + + @property + def agent_names(self) -> List[str]: + """Return the names of the agents in the group chat.""" + return [agent.name for agent in self.agents] + + def reset(self): + """Reset the group chat.""" + self.messages.clear() + + def agent_by_name(self, name: str) -> Flow: + """Find an agent whose name is contained within the given 'name' string.""" + for agent in self.agents: + if agent.name in name: + return agent + raise ValueError(f"No agent found with a name contained in '{name}'.") + + def next_agent(self, agent: Flow) -> Flow: + """Return the next agent in the list.""" + return self.agents[(self.agent_names.index(agent.name) + 1) % len(self.agents)] + + def select_speaker_msg(self): + """Return the message for selecting the next speaker.""" + return f""" + You are in a role play game. The following roles are available: + {self._participant_roles()}. + + Read the following conversation. + Then select the next role from {self.agent_names} to play. Only return the role. + """ + + def select_speaker(self, last_speaker: Flow, selector: Flow): + """Select the next speaker.""" + selector.update_system_message(self.select_speaker_msg()) + + # Warn if GroupChat is underpopulated, without established changing behavior + n_agents = len(self.agent_names) + if n_agents < 3: + logger.warning( + f"GroupChat is underpopulated with {n_agents} agents. Direct" + " communication would be more efficient." ) - print(metrics) - - responses = {} - for agent_name, agent in self.agents.items(): - if agent_name != sender_name: - if agent_name in self.duties: - message += f"Your duty is {self.duties[agent_name]}" - if agent_name in self.roles: - message += ( - f"You are the {self.roles[agent_name]} in this conversation" - ) + name = selector.generate_reply( + self.format_history( + self.messages + + [ + { + "role": "system", + "content": ( + "Read the above conversation. Then select the next most" + f" suitable role from {self.agent_names} to play. Only" + " return the role." + ), + } + ] + ) + ) + try: + return self.agent_by_name(name["content"]) + except ValueError: + return self.next_agent(last_speaker) + + def _participant_roles(self): + return "\n".join( + [f"{agent.name}: {agent.system_message}" for agent in self.agents] + ) + + def format_history(self, messages: List[Dict]) -> str: + formatted_messages = [] + for message in messages: + formatted_message = f"'{message['role']}:{message['content']}" + formatted_messages.append(formatted_message) + return "\n".join(formatted_messages) + + +class GroupChatManager: + def __init__(self, groupchat: GroupChat, selector: Flow): + self.groupchat = groupchat + self.selector = selector + + def __call__(self, task: str): + self.groupchat.messages.append({"role": self.selector.name, "content": task}) + for i in range(self.groupchat.max_round): + speaker = self.groupchat.select_speaker( + last_speaker=self.selector, selector=self.selector + ) + reply = speaker.generate_reply( + self.groupchat.format_history(self.groupchat.messages) + ) + self.groupchat.messages.append(reply) + print(reply) + if i == self.groupchat.max_round - 1: + break - responses[agent_name] = agent.run(message) - return responses + return reply diff --git a/swarms/swarms/multi_agent_collab.py b/swarms/swarms/multi_agent_collab.py index 6413b662..9a5f27bc 100644 --- a/swarms/swarms/multi_agent_collab.py +++ b/swarms/swarms/multi_agent_collab.py @@ -6,7 +6,10 @@ from langchain.output_parsers import RegexParser # utils class BidOutputParser(RegexParser): def get_format_instructions(self) -> str: - return "Your response should be an integrater delimited by angled brackets like this: " + return ( + "Your response should be an integrater delimited by angled brackets like" + " this: " + ) bid_parser = BidOutputParser( diff --git a/swarms/swarms/orchestrate.py b/swarms/swarms/orchestrate.py index 09914485..f522911b 100644 --- a/swarms/swarms/orchestrate.py +++ b/swarms/swarms/orchestrate.py @@ -153,7 +153,8 @@ class Orchestrator: except Exception as error: logging.error( - f"Failed to process task {id(task)} by agent {id(agent)}. Error: {error}" + f"Failed to process task {id(task)} by agent {id(agent)}. Error:" + f" {error}" ) finally: with self.condition: diff --git a/swarms/tools/autogpt.py b/swarms/tools/autogpt.py index c2f56db6..cf5450e6 100644 --- a/swarms/tools/autogpt.py +++ b/swarms/tools/autogpt.py @@ -181,8 +181,8 @@ def VQAinference(self, inputs): answer = processor.decode(out[0], skip_special_tokens=True) logger.debug( - f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, " - f"Output Answer: {answer}" + f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input" + f" Question: {question}, Output Answer: {answer}" ) return answer diff --git a/swarms/tools/exit_conversation.py b/swarms/tools/exit_conversation.py deleted file mode 100644 index d1543e14..00000000 --- a/swarms/tools/exit_conversation.py +++ /dev/null @@ -1,22 +0,0 @@ -from langchain.tools import tool - -from swarms.tools.base import BaseToolSet, SessionGetter, ToolScope -from swarms.utils.logger import logger - - -class ExitConversation(BaseToolSet): - @tool( - name="Exit Conversation", - description="A tool to exit the conversation. " - "Use this when you want to exit the conversation. " - "The input should be a message that the conversation is over.", - scope=ToolScope.SESSION, - ) - def exit(self, message: str, get_session: SessionGetter) -> str: - """Run the tool.""" - _, executor = get_session() - del executor - - logger.debug("\nProcessed ExitConversation.") - - return message diff --git a/swarms/tools/interpreter_tool.py b/swarms/tools/interpreter_tool.py deleted file mode 100644 index 22758de6..00000000 --- a/swarms/tools/interpreter_tool.py +++ /dev/null @@ -1,24 +0,0 @@ -import os -import interpreter - - -def compile(task: str): - """ - Open Interpreter lets LLMs run code (Python, Javascript, Shell, and more) locally. You can chat with Open Interpreter through a ChatGPT-like interface in your terminal by running $ interpreter after installing. - - This provides a natural-language interface to your computer's general-purpose capabilities: - - Create and edit photos, videos, PDFs, etc. - Control a Chrome browser to perform research - Plot, clean, and analyze large datasets - ...etc. - ⚠️ Note: You'll be asked to approve code before it's run. - """ - - task = interpreter.chat(task, return_messages=True) - interpreter.chat() - interpreter.reset(task) - - os.environ["INTERPRETER_CLI_AUTO_RUN"] = True - os.environ["INTERPRETER_CLI_FAST_MODE"] = True - os.environ["INTERPRETER_CLI_DEBUG"] = True diff --git a/swarms/tools/mm_models.py b/swarms/tools/mm_models.py index 0b1cd281..58fe11e5 100644 --- a/swarms/tools/mm_models.py +++ b/swarms/tools/mm_models.py @@ -19,13 +19,12 @@ from transformers import ( ) from swarms.prompts.prebuild.multi_modal_prompts import IMAGE_PROMPT -from swarms.tools.base import tool -from swarms.tools.main import BaseToolSet +from swarms.tools.tool import tool from swarms.utils.logger import logger from swarms.utils.main import BaseHandler, get_new_image_name -class MaskFormer(BaseToolSet): +class MaskFormer: def __init__(self, device): print("Initializing MaskFormer to %s" % device) self.device = device @@ -61,7 +60,7 @@ class MaskFormer(BaseToolSet): return image_mask.resize(original_image.size) -class ImageEditing(BaseToolSet): +class ImageEditing: def __init__(self, device): print("Initializing ImageEditing to %s" % device) self.device = device @@ -76,10 +75,12 @@ class ImageEditing(BaseToolSet): @tool( name="Remove Something From The Photo", - description="useful when you want to remove and object or something from the photo " - "from its description or location. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the object need to be removed. ", + description=( + "useful when you want to remove and object or something from the photo " + "from its description or location. " + "The input to this tool should be a comma separated string of two, " + "representing the image_path and the object need to be removed. " + ), ) def inference_remove(self, inputs): image_path, to_be_removed_txt = inputs.split(",") @@ -87,10 +88,12 @@ class ImageEditing(BaseToolSet): @tool( name="Replace Something From The Photo", - description="useful when you want to replace an object from the object description or " - "location with another object from its description. " - "The input to this tool should be a comma separated string of three, " - "representing the image_path, the object to be replaced, the object to be replaced with ", + description=( + "useful when you want to replace an object from the object description or" + " location with another object from its description. The input to this tool" + " should be a comma separated string of three, representing the image_path," + " the object to be replaced, the object to be replaced with " + ), ) def inference_replace(self, inputs): image_path, to_be_replaced_txt, replace_with_txt = inputs.split(",") @@ -109,14 +112,15 @@ class ImageEditing(BaseToolSet): updated_image.save(updated_image_path) logger.debug( - f"\nProcessed ImageEditing, Input Image: {image_path}, Replace {to_be_replaced_txt} to {replace_with_txt}, " - f"Output Image: {updated_image_path}" + f"\nProcessed ImageEditing, Input Image: {image_path}, Replace" + f" {to_be_replaced_txt} to {replace_with_txt}, Output Image:" + f" {updated_image_path}" ) return updated_image_path -class InstructPix2Pix(BaseToolSet): +class InstructPix2Pix: def __init__(self, device): print("Initializing InstructPix2Pix to %s" % device) self.device = device @@ -132,10 +136,12 @@ class InstructPix2Pix(BaseToolSet): @tool( name="Instruct Image Using Text", - description="useful when you want to the style of the image to be like the text. " - "like: make it look like a painting. or make it like a robot. " - "The input to this tool should be a comma separated string of two, " - "representing the image_path and the text. ", + description=( + "useful when you want to the style of the image to be like the text. " + "like: make it look like a painting. or make it like a robot. " + "The input to this tool should be a comma separated string of two, " + "representing the image_path and the text. " + ), ) def inference(self, inputs): """Change style of image.""" @@ -149,14 +155,14 @@ class InstructPix2Pix(BaseToolSet): image.save(updated_image_path) logger.debug( - f"\nProcessed InstructPix2Pix, Input Image: {image_path}, Instruct Text: {text}, " - f"Output Image: {updated_image_path}" + f"\nProcessed InstructPix2Pix, Input Image: {image_path}, Instruct Text:" + f" {text}, Output Image: {updated_image_path}" ) return updated_image_path -class Text2Image(BaseToolSet): +class Text2Image: def __init__(self, device): print("Initializing Text2Image to %s" % device) self.device = device @@ -173,9 +179,12 @@ class Text2Image(BaseToolSet): @tool( name="Generate Image From User Input Text", - description="useful when you want to generate an image from a user input text and save it to a file. " - "like: generate an image of an object or something, or generate an image that includes some objects. " - "The input to this tool should be a string, representing the text used to generate image. ", + description=( + "useful when you want to generate an image from a user input text and save" + " it to a file. like: generate an image of an object or something, or" + " generate an image that includes some objects. The input to this tool" + " should be a string, representing the text used to generate image. " + ), ) def inference(self, text): image_filename = os.path.join("image", str(uuid.uuid4())[0:8] + ".png") @@ -184,13 +193,14 @@ class Text2Image(BaseToolSet): image.save(image_filename) logger.debug( - f"\nProcessed Text2Image, Input Text: {text}, Output Image: {image_filename}" + f"\nProcessed Text2Image, Input Text: {text}, Output Image:" + f" {image_filename}" ) return image_filename -class VisualQuestionAnswering(BaseToolSet): +class VisualQuestionAnswering: def __init__(self, device): print("Initializing VisualQuestionAnswering to %s" % device) self.torch_dtype = torch.float16 if "cuda" in device else torch.float32 @@ -202,9 +212,12 @@ class VisualQuestionAnswering(BaseToolSet): @tool( name="Answer Question About The Image", - description="useful when you need an answer for a question based on an image. " - "like: what is the background color of the last image, how many cats in this figure, what is in this figure. " - "The input to this tool should be a comma separated string of two, representing the image_path and the question", + description=( + "useful when you need an answer for a question based on an image. like:" + " what is the background color of the last image, how many cats in this" + " figure, what is in this figure. The input to this tool should be a comma" + " separated string of two, representing the image_path and the question" + ), ) def inference(self, inputs): image_path, question = inputs.split(",") @@ -216,8 +229,8 @@ class VisualQuestionAnswering(BaseToolSet): answer = self.processor.decode(out[0], skip_special_tokens=True) logger.debug( - f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input Question: {question}, " - f"Output Answer: {answer}" + f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input" + f" Question: {question}, Output Answer: {answer}" ) return answer @@ -251,7 +264,8 @@ class ImageCaptioning(BaseHandler): out = self.model.generate(**inputs) description = self.processor.decode(out[0], skip_special_tokens=True) print( - f"\nProcessed ImageCaptioning, Input Image: {filename}, Output Text: {description}" + f"\nProcessed ImageCaptioning, Input Image: {filename}, Output Text:" + f" {description}" ) return IMAGE_PROMPT.format(filename=filename, description=description) diff --git a/swarms/tools/requests.py b/swarms/tools/requests.py deleted file mode 100644 index fa60e8e4..00000000 --- a/swarms/tools/requests.py +++ /dev/null @@ -1,36 +0,0 @@ -import requests -from bs4 import BeautifulSoup - -from swarms.tools.base import BaseToolSet, tool -from swarms.utils.logger import logger - - -class RequestsGet(BaseToolSet): - @tool( - name="Requests Get", - description="A portal to the internet. " - "Use this when you need to get specific content from a website." - "Input should be a url (i.e. https://www.google.com)." - "The output will be the text response of the GET request.", - ) - def get(self, url: str) -> str: - """Run the tool.""" - html = requests.get(url).text - soup = BeautifulSoup(html) - non_readable_tags = soup.find_all( - ["script", "style", "header", "footer", "form"] - ) - - for non_readable_tag in non_readable_tags: - non_readable_tag.extract() - - content = soup.get_text("\n", strip=True) - - if len(content) > 300: - content = content[:300] + "..." - - logger.debug( - f"\nProcessed RequestsGet, Input Url: {url} " f"Output Contents: {content}" - ) - - return content diff --git a/swarms/tools/tool.py b/swarms/tools/tool.py index 8f01ac0d..f7e85204 100644 --- a/swarms/tools/tool.py +++ b/swarms/tools/tool.py @@ -121,10 +121,10 @@ class ChildTool(BaseTool): name = cls.__name__ raise SchemaAnnotationError( f"Tool definition for {name} must include valid type annotations" - f" for argument 'args_schema' to behave as expected.\n" - f"Expected annotation of 'Type[BaseModel]'" + " for argument 'args_schema' to behave as expected.\n" + "Expected annotation of 'Type[BaseModel]'" f" but got '{args_schema_type}'.\n" - f"Expected class looks like:\n" + "Expected class looks like:\n" f"{typehint_mandate}" ) @@ -353,7 +353,7 @@ class ChildTool(BaseTool): observation = self.handle_tool_error(e) else: raise ValueError( - f"Got unexpected type of `handle_tool_error`. Expected bool, str " + "Got unexpected type of `handle_tool_error`. Expected bool, str " f"or callable. Received: {self.handle_tool_error}" ) run_manager.on_tool_end( @@ -428,7 +428,7 @@ class ChildTool(BaseTool): observation = self.handle_tool_error(e) else: raise ValueError( - f"Got unexpected type of `handle_tool_error`. Expected bool, str " + "Got unexpected type of `handle_tool_error`. Expected bool, str " f"or callable. Received: {self.handle_tool_error}" ) await run_manager.on_tool_end( @@ -459,7 +459,6 @@ class Tool(BaseTool): """The asynchronous version of the function.""" # --- Runnable --- - async def ainvoke( self, input: Union[str, Dict], @@ -492,8 +491,7 @@ class Tool(BaseTool): all_args = list(args) + list(kwargs.values()) if len(all_args) != 1: raise ToolException( - f"Too many arguments to single-input tool {self.name}." - f" Args: {all_args}" + f"Too many arguments to single-input tool {self.name}. Args: {all_args}" ) return tuple(all_args), {} @@ -590,7 +588,6 @@ class StructuredTool(BaseTool): """The asynchronous version of the function.""" # --- Runnable --- - async def ainvoke( self, input: Union[str, Dict], diff --git a/swarms/utils/code_interpreter.py b/swarms/utils/code_interpreter.py index cf557385..80eb6700 100644 --- a/swarms/utils/code_interpreter.py +++ b/swarms/utils/code_interpreter.py @@ -25,6 +25,15 @@ class SubprocessCodeInterpreter(BaseCodeInterpreter): SubprocessCodeinterpreter is a base class for code interpreters that run code in a subprocess. + Attributes: + start_cmd (str): The command to start the subprocess. Should be a string that can be split by spaces. + process (subprocess.Popen): The subprocess that is running the code. + debug_mode (bool): Whether to print debug statements. + output_queue (queue.Queue): A queue that is filled with output from the subprocess. + done (threading.Event): An event that is set when the subprocess is done running code. + + Example: + >>> from swarms.utils.code_interpreter import SubprocessCodeInterpreter """ @@ -89,7 +98,7 @@ class SubprocessCodeInterpreter(BaseCodeInterpreter): code = self.preprocess_code(code) if not self.process: self.start_process() - except: + except BaseException: yield {"output": traceback.format_exc()} return @@ -103,7 +112,7 @@ class SubprocessCodeInterpreter(BaseCodeInterpreter): self.process.stdin.write(code + "\n") self.process.stdin.flush() break - except: + except BaseException: if retry_count != 0: # For UX, I like to hide this if it happens once. Obviously feels better to not see errors # Most of the time it doesn't matter, but we should figure out why it happens frequently with: diff --git a/swarms/utils/main.py b/swarms/utils/main.py index 3fa4b2ea..63cb0e4a 100644 --- a/swarms/utils/main.py +++ b/swarms/utils/main.py @@ -88,7 +88,6 @@ def get_new_dataframe_name(org_img_name, func_name="update"): # =======================> utils end - # =======================> ANSI BEGINNING @@ -208,10 +207,8 @@ def dim_multiline(message: str) -> str: # +=============================> ANSI Ending - # ================================> upload base - STATIC_DIR = "static" @@ -227,7 +224,6 @@ class AbstractUploader(ABC): # ================================> upload end - # ========================= upload s3 @@ -263,7 +259,6 @@ class S3Uploader(AbstractUploader): # ========================= upload s3 - # ========================> upload/static @@ -292,7 +287,6 @@ class StaticUploader(AbstractUploader): # ========================> handlers/base - # from env import settings @@ -383,7 +377,7 @@ class FileHandler: if FileType.from_url(url) == FileType.IMAGE: raise Exception( f"No handler for {FileType.from_url(url)}. " - f"Please set USE_GPU to True in env/settings.py" + "Please set USE_GPU to True in env/settings.py" ) else: raise Exception(f"No handler for {FileType.from_url(url)}") @@ -394,7 +388,6 @@ class FileHandler: # => base end - # ===========================> @@ -408,7 +401,8 @@ class CsvToDataframe(BaseHandler): ) print( - f"\nProcessed CsvToDataframe, Input CSV: {filename}, Output Description: {description}" + f"\nProcessed CsvToDataframe, Input CSV: {filename}, Output Description:" + f" {description}" ) return DATAFRAME_PROMPT.format(filename=filename, description=description) diff --git a/swarms/workers/__init__.py b/swarms/workers/__init__.py index 2a7cc4f1..9dabe94d 100644 --- a/swarms/workers/__init__.py +++ b/swarms/workers/__init__.py @@ -1,2 +1,2 @@ -from swarms.workers.worker import Worker +# from swarms.workers.worker import Worker from swarms.workers.base import AbstractWorker diff --git a/swarms/workers/worker.py b/swarms/workers/worker.py index be422ff2..9986666a 100644 --- a/swarms/workers/worker.py +++ b/swarms/workers/worker.py @@ -163,7 +163,8 @@ class Worker: except Exception as error: raise RuntimeError( - f"Error setting up memory perhaps try try tuning the embedding size: {error}" + "Error setting up memory perhaps try try tuning the embedding size:" + f" {error}" ) def setup_agent(self): diff --git a/tests/chunkers/basechunker.py b/tests/chunkers/basechunker.py index f70705bc..4fd92da1 100644 --- a/tests/chunkers/basechunker.py +++ b/tests/chunkers/basechunker.py @@ -3,7 +3,7 @@ from swarms.chunkers.base import ( BaseChunker, TextArtifact, ChunkSeparator, - OpenAiTokenizer, + OpenAITokenizer, ) # adjust the import paths accordingly @@ -21,7 +21,7 @@ def test_default_separators(): def test_default_tokenizer(): chunker = BaseChunker() - assert isinstance(chunker.tokenizer, OpenAiTokenizer) + assert isinstance(chunker.tokenizer, OpenAITokenizer) # 2. Test Basic Chunking diff --git a/tests/models/ada.py b/tests/models/ada.py index 786b162d..11139929 100644 --- a/tests/models/ada.py +++ b/tests/models/ada.py @@ -3,7 +3,9 @@ import pytest import openai from unittest.mock import patch -from swarms.models.simple_ada import get_ada_embeddings # Adjust this import path to your project structure +from swarms.models.simple_ada import ( + get_ada_embeddings, +) # Adjust this import path to your project structure from os import getenv from dotenv import load_dotenv @@ -18,20 +20,24 @@ def test_texts(): "A quick brown fox jumps over the lazy dog", ] + # Basic Test def test_get_ada_embeddings_basic(test_texts): - with patch('openai.Embedding.create') as mock_create: + with patch("openai.Embedding.create") as mock_create: # Mocking the OpenAI API call - mock_create.return_value = { - "data": [ - {"embedding": [0.1, 0.2, 0.3]} - ] - } - + mock_create.return_value = {"data": [{"embedding": [0.1, 0.2, 0.3]}]} + for text in test_texts: embedding = get_ada_embeddings(text) - assert embedding == [0.1, 0.2, 0.3], "Embedding does not match expected output" - mock_create.assert_called_with(input=[text.replace("\n", " ")], model="text-embedding-ada-002") + assert embedding == [ + 0.1, + 0.2, + 0.3, + ], "Embedding does not match expected output" + mock_create.assert_called_with( + input=[text.replace("\n", " ")], model="text-embedding-ada-002" + ) + # Parameterized Test @pytest.mark.parametrize( @@ -42,27 +48,28 @@ def test_get_ada_embeddings_basic(test_texts): ], ) def test_get_ada_embeddings_models(text, model, expected_call_model): - with patch('openai.Embedding.create') as mock_create: - mock_create.return_value = { - "data": [ - {"embedding": [0.1, 0.2, 0.3]} - ] - } + with patch("openai.Embedding.create") as mock_create: + mock_create.return_value = {"data": [{"embedding": [0.1, 0.2, 0.3]}]} _ = get_ada_embeddings(text, model=model) mock_create.assert_called_with(input=[text], model=expected_call_model) + # Exception Test def test_get_ada_embeddings_exception(): - with patch('openai.Embedding.create') as mock_create: + with patch("openai.Embedding.create") as mock_create: mock_create.side_effect = openai.error.OpenAIError("Test error") with pytest.raises(openai.error.OpenAIError): get_ada_embeddings("Some text") + # Tests for environment variable loading def test_env_var_loading(monkeypatch): monkeypatch.setenv("OPENAI_API_KEY", "testkey123") - with patch('openai.Embedding.create'): - assert getenv("OPENAI_API_KEY") == "testkey123", "Environment variable for API key is not set correctly" + with patch("openai.Embedding.create"): + assert ( + getenv("OPENAI_API_KEY") == "testkey123" + ), "Environment variable for API key is not set correctly" + # ... more tests to cover other aspects such as different input types, large inputs, invalid inputs, etc. diff --git a/tests/models/anthropic.py b/tests/models/anthropic.py new file mode 100644 index 00000000..4dbd365d --- /dev/null +++ b/tests/models/anthropic.py @@ -0,0 +1,127 @@ +import os +import pytest +from unittest.mock import Mock, patch +from swarms.models.anthropic import Anthropic + + +@pytest.fixture +def mock_anthropic_env(): + os.environ["ANTHROPIC_API_URL"] = "https://test.anthropic.com" + os.environ["ANTHROPIC_API_KEY"] = "test_api_key" + yield + del os.environ["ANTHROPIC_API_URL"] + del os.environ["ANTHROPIC_API_KEY"] + + +@pytest.fixture +def mock_requests_post(): + with patch("requests.post") as mock_post: + yield mock_post + + +@pytest.fixture +def anthropic_instance(): + return Anthropic(model="test-model") + + +def test_anthropic_init_default_values(anthropic_instance): + assert anthropic_instance.model == "test-model" + assert anthropic_instance.max_tokens_to_sample == 256 + assert anthropic_instance.temperature is None + assert anthropic_instance.top_k is None + assert anthropic_instance.top_p is None + assert anthropic_instance.streaming is False + assert anthropic_instance.default_request_timeout == 600 + assert anthropic_instance.anthropic_api_url == "https://test.anthropic.com" + assert anthropic_instance.anthropic_api_key == "test_api_key" + + +def test_anthropic_init_custom_values(): + anthropic_instance = Anthropic( + model="custom-model", + max_tokens_to_sample=128, + temperature=0.8, + top_k=5, + top_p=0.9, + streaming=True, + default_request_timeout=300, + ) + assert anthropic_instance.model == "custom-model" + assert anthropic_instance.max_tokens_to_sample == 128 + assert anthropic_instance.temperature == 0.8 + assert anthropic_instance.top_k == 5 + assert anthropic_instance.top_p == 0.9 + assert anthropic_instance.streaming is True + assert anthropic_instance.default_request_timeout == 300 + + +def test_anthropic_default_params(anthropic_instance): + default_params = anthropic_instance._default_params() + assert default_params == { + "max_tokens_to_sample": 256, + "model": "test-model", + } + + +def test_anthropic_run(mock_anthropic_env, mock_requests_post, anthropic_instance): + mock_response = Mock() + mock_response.json.return_value = {"completion": "Generated text"} + mock_requests_post.return_value = mock_response + + task = "Generate text" + stop = ["stop1", "stop2"] + + completion = anthropic_instance.run(task, stop) + + assert completion == "Generated text" + mock_requests_post.assert_called_once_with( + "https://test.anthropic.com/completions", + headers={"Authorization": "Bearer test_api_key"}, + json={ + "prompt": task, + "stop_sequences": stop, + "max_tokens_to_sample": 256, + "model": "test-model", + }, + timeout=600, + ) + + +def test_anthropic_call(mock_anthropic_env, mock_requests_post, anthropic_instance): + mock_response = Mock() + mock_response.json.return_value = {"completion": "Generated text"} + mock_requests_post.return_value = mock_response + + task = "Generate text" + stop = ["stop1", "stop2"] + + completion = anthropic_instance(task, stop) + + assert completion == "Generated text" + mock_requests_post.assert_called_once_with( + "https://test.anthropic.com/completions", + headers={"Authorization": "Bearer test_api_key"}, + json={ + "prompt": task, + "stop_sequences": stop, + "max_tokens_to_sample": 256, + "model": "test-model", + }, + timeout=600, + ) + + +def test_anthropic_exception_handling( + mock_anthropic_env, mock_requests_post, anthropic_instance +): + mock_response = Mock() + mock_response.json.return_value = {"error": "An error occurred"} + mock_requests_post.return_value = mock_response + + task = "Generate text" + stop = ["stop1", "stop2"] + + with pytest.raises(Exception) as excinfo: + anthropic_instance(task, stop) + + assert "An error occurred" in str(excinfo.value) diff --git a/tests/models/dalle3.py b/tests/models/dalle3.py new file mode 100644 index 00000000..f9a2f8cf --- /dev/null +++ b/tests/models/dalle3.py @@ -0,0 +1,410 @@ +import os +from unittest.mock import Mock + +import pytest +from openai import OpenAIError +from PIL import Image +from termcolor import colored + +from playground.models.dalle3 import Dalle3 + + +# Mocking the OpenAI client to avoid making actual API calls during testing +@pytest.fixture +def mock_openai_client(): + return Mock() + + +@pytest.fixture +def dalle3(mock_openai_client): + return Dalle3(client=mock_openai_client) + + +def test_dalle3_call_success(dalle3, mock_openai_client): + # Arrange + task = "A painting of a dog" + expected_img_url = ( + "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + ) + mock_openai_client.images.generate.return_value = Mock( + data=[Mock(url=expected_img_url)] + ) + + # Act + img_url = dalle3(task) + + # Assert + assert img_url == expected_img_url + mock_openai_client.images.generate.assert_called_once_with(prompt=task, n=4) + + +def test_dalle3_call_failure(dalle3, mock_openai_client, capsys): + # Arrange + task = "Invalid task" + expected_error_message = "Error running Dalle3: API Error" + + # Mocking OpenAIError + mock_openai_client.images.generate.side_effect = OpenAIError( + expected_error_message, http_status=500, error="Internal Server Error" + ) + + # Act and assert + with pytest.raises(OpenAIError) as excinfo: + dalle3(task) + + assert str(excinfo.value) == expected_error_message + mock_openai_client.images.generate.assert_called_once_with(prompt=task, n=4) + + # Ensure the error message is printed in red + captured = capsys.readouterr() + assert colored(expected_error_message, "red") in captured.out + + +def test_dalle3_create_variations_success(dalle3, mock_openai_client): + # Arrange + img_url = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + expected_variation_url = ( + "https://cdn.openai.com/dall-e/encoded/feats/feats_02ABCDE.png" + ) + mock_openai_client.images.create_variation.return_value = Mock( + data=[Mock(url=expected_variation_url)] + ) + + # Act + variation_img_url = dalle3.create_variations(img_url) + + # Assert + assert variation_img_url == expected_variation_url + mock_openai_client.images.create_variation.assert_called_once() + _, kwargs = mock_openai_client.images.create_variation.call_args + assert kwargs["img"] is not None + assert kwargs["n"] == 4 + assert kwargs["size"] == "1024x1024" + + +def test_dalle3_create_variations_failure(dalle3, mock_openai_client, capsys): + # Arrange + img_url = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + expected_error_message = "Error running Dalle3: API Error" + + # Mocking OpenAIError + mock_openai_client.images.create_variation.side_effect = OpenAIError( + expected_error_message, http_status=500, error="Internal Server Error" + ) + + # Act and assert + with pytest.raises(OpenAIError) as excinfo: + dalle3.create_variations(img_url) + + assert str(excinfo.value) == expected_error_message + mock_openai_client.images.create_variation.assert_called_once() + + # Ensure the error message is printed in red + captured = capsys.readouterr() + assert colored(expected_error_message, "red") in captured.out + + +def test_dalle3_read_img(): + # Arrange + img_path = "test_image.png" + img = Image.new("RGB", (512, 512)) + + # Save the image temporarily + img.save(img_path) + + # Act + dalle3 = Dalle3() + img_loaded = dalle3.read_img(img_path) + + # Assert + assert isinstance(img_loaded, Image.Image) + + # Clean up + os.remove(img_path) + + +def test_dalle3_set_width_height(): + # Arrange + img = Image.new("RGB", (512, 512)) + width = 256 + height = 256 + + # Act + dalle3 = Dalle3() + img_resized = dalle3.set_width_height(img, width, height) + + # Assert + assert img_resized.size == (width, height) + + +def test_dalle3_convert_to_bytesio(): + # Arrange + img = Image.new("RGB", (512, 512)) + expected_format = "PNG" + + # Act + dalle3 = Dalle3() + img_bytes = dalle3.convert_to_bytesio(img, format=expected_format) + + # Assert + assert isinstance(img_bytes, bytes) + assert img_bytes.startswith(b"\x89PNG") + + +def test_dalle3_call_multiple_times(dalle3, mock_openai_client): + # Arrange + task = "A painting of a dog" + expected_img_url = ( + "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + ) + mock_openai_client.images.generate.return_value = Mock( + data=[Mock(url=expected_img_url)] + ) + + # Act + img_url1 = dalle3(task) + img_url2 = dalle3(task) + + # Assert + assert img_url1 == expected_img_url + assert img_url2 == expected_img_url + assert mock_openai_client.images.generate.call_count == 2 + + +def test_dalle3_call_with_large_input(dalle3, mock_openai_client): + # Arrange + task = "A" * 2048 # Input longer than API's limit + expected_error_message = "Error running Dalle3: API Error" + mock_openai_client.images.generate.side_effect = OpenAIError( + expected_error_message, http_status=500, error="Internal Server Error" + ) + + # Act and assert + with pytest.raises(OpenAIError) as excinfo: + dalle3(task) + + assert str(excinfo.value) == expected_error_message + + +def test_dalle3_create_variations_with_invalid_image_url(dalle3, mock_openai_client): + # Arrange + img_url = "https://invalid-image-url.com" + expected_error_message = "Error running Dalle3: Invalid image URL" + + # Act and assert + with pytest.raises(ValueError) as excinfo: + dalle3.create_variations(img_url) + + assert str(excinfo.value) == expected_error_message + + +def test_dalle3_set_width_height_invalid_dimensions(dalle3): + # Arrange + img = dalle3.read_img("test_image.png") + width = 0 + height = -1 + + # Act and assert + with pytest.raises(ValueError): + dalle3.set_width_height(img, width, height) + + +def test_dalle3_convert_to_bytesio_invalid_format(dalle3): + # Arrange + img = dalle3.read_img("test_image.png") + invalid_format = "invalid_format" + + # Act and assert + with pytest.raises(ValueError): + dalle3.convert_to_bytesio(img, format=invalid_format) + + +def test_dalle3_call_with_retry(dalle3, mock_openai_client): + # Arrange + task = "A painting of a dog" + expected_img_url = ( + "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + ) + + # Simulate a retry scenario + mock_openai_client.images.generate.side_effect = [ + OpenAIError("Temporary error", http_status=500, error="Internal Server Error"), + Mock(data=[Mock(url=expected_img_url)]), + ] + + # Act + img_url = dalle3(task) + + # Assert + assert img_url == expected_img_url + assert mock_openai_client.images.generate.call_count == 2 + + +def test_dalle3_create_variations_with_retry(dalle3, mock_openai_client): + # Arrange + img_url = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + expected_variation_url = ( + "https://cdn.openai.com/dall-e/encoded/feats/feats_02ABCDE.png" + ) + + # Simulate a retry scenario + mock_openai_client.images.create_variation.side_effect = [ + OpenAIError("Temporary error", http_status=500, error="Internal Server Error"), + Mock(data=[Mock(url=expected_variation_url)]), + ] + + # Act + variation_img_url = dalle3.create_variations(img_url) + + # Assert + assert variation_img_url == expected_variation_url + assert mock_openai_client.images.create_variation.call_count == 2 + + +def test_dalle3_call_exception_logging(dalle3, mock_openai_client, capsys): + # Arrange + task = "A painting of a dog" + expected_error_message = "Error running Dalle3: API Error" + + # Mocking OpenAIError + mock_openai_client.images.generate.side_effect = OpenAIError( + expected_error_message, http_status=500, error="Internal Server Error" + ) + + # Act + with pytest.raises(OpenAIError): + dalle3(task) + + # Assert that the error message is logged + captured = capsys.readouterr() + assert expected_error_message in captured.err + + +def test_dalle3_create_variations_exception_logging(dalle3, mock_openai_client, capsys): + # Arrange + img_url = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + expected_error_message = "Error running Dalle3: API Error" + + # Mocking OpenAIError + mock_openai_client.images.create_variation.side_effect = OpenAIError( + expected_error_message, http_status=500, error="Internal Server Error" + ) + + # Act + with pytest.raises(OpenAIError): + dalle3.create_variations(img_url) + + # Assert that the error message is logged + captured = capsys.readouterr() + assert expected_error_message in captured.err + + +def test_dalle3_read_img_invalid_path(dalle3): + # Arrange + invalid_img_path = "invalid_image_path.png" + + # Act and assert + with pytest.raises(FileNotFoundError): + dalle3.read_img(invalid_img_path) + + +def test_dalle3_call_no_api_key(): + # Arrange + task = "A painting of a dog" + dalle3 = Dalle3(api_key=None) + expected_error_message = "Error running Dalle3: API Key is missing" + + # Act and assert + with pytest.raises(ValueError) as excinfo: + dalle3(task) + + assert str(excinfo.value) == expected_error_message + + +def test_dalle3_create_variations_no_api_key(): + # Arrange + img_url = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + dalle3 = Dalle3(api_key=None) + expected_error_message = "Error running Dalle3: API Key is missing" + + # Act and assert + with pytest.raises(ValueError) as excinfo: + dalle3.create_variations(img_url) + + assert str(excinfo.value) == expected_error_message + + +def test_dalle3_call_with_retry_max_retries_exceeded(dalle3, mock_openai_client): + # Arrange + task = "A painting of a dog" + + # Simulate max retries exceeded + mock_openai_client.images.generate.side_effect = OpenAIError( + "Temporary error", http_status=500, error="Internal Server Error" + ) + + # Act and assert + with pytest.raises(OpenAIError) as excinfo: + dalle3(task) + + assert "Retry limit exceeded" in str(excinfo.value) + + +def test_dalle3_create_variations_with_retry_max_retries_exceeded( + dalle3, mock_openai_client +): + # Arrange + img_url = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + + # Simulate max retries exceeded + mock_openai_client.images.create_variation.side_effect = OpenAIError( + "Temporary error", http_status=500, error="Internal Server Error" + ) + + # Act and assert + with pytest.raises(OpenAIError) as excinfo: + dalle3.create_variations(img_url) + + assert "Retry limit exceeded" in str(excinfo.value) + + +def test_dalle3_call_retry_with_success(dalle3, mock_openai_client): + # Arrange + task = "A painting of a dog" + expected_img_url = ( + "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + ) + + # Simulate success after a retry + mock_openai_client.images.generate.side_effect = [ + OpenAIError("Temporary error", http_status=500, error="Internal Server Error"), + Mock(data=[Mock(url=expected_img_url)]), + ] + + # Act + img_url = dalle3(task) + + # Assert + assert img_url == expected_img_url + assert mock_openai_client.images.generate.call_count == 2 + + +def test_dalle3_create_variations_retry_with_success(dalle3, mock_openai_client): + # Arrange + img_url = "https://cdn.openai.com/dall-e/encoded/feats/feats_01J9J5ZKJZJY9.png" + expected_variation_url = ( + "https://cdn.openai.com/dall-e/encoded/feats/feats_02ABCDE.png" + ) + + # Simulate success after a retry + mock_openai_client.images.create_variation.side_effect = [ + OpenAIError("Temporary error", http_status=500, error="Internal Server Error"), + Mock(data=[Mock(url=expected_variation_url)]), + ] + + # Act + variation_img_url = dalle3.create_variations(img_url) + + # Assert + assert variation_img_url == expected_variation_url + assert mock_openai_client.images.create_variation.call_count == 2 diff --git a/tests/models/distilled_whisperx.py b/tests/models/distilled_whisperx.py new file mode 100644 index 00000000..4bdd10f3 --- /dev/null +++ b/tests/models/distilled_whisperx.py @@ -0,0 +1,119 @@ +# test_distilled_whisperx.py + +from unittest.mock import AsyncMock, MagicMock + +import pytest +import torch +from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor + +from swarms.models.distilled_whisperx import DistilWhisperModel, async_retry + + +# Fixtures for setting up model, processor, and audio files +@pytest.fixture(scope="module") +def model_id(): + return "distil-whisper/distil-large-v2" + + +@pytest.fixture(scope="module") +def whisper_model(model_id): + return DistilWhisperModel(model_id) + + +@pytest.fixture(scope="session") +def audio_file_path(tmp_path_factory): + # You would create a small temporary MP3 file here for testing + # or use a public domain MP3 file's path + return "path/to/valid_audio.mp3" + + +@pytest.fixture(scope="session") +def invalid_audio_file_path(): + return "path/to/invalid_audio.mp3" + + +@pytest.fixture(scope="session") +def audio_dict(): + # This should represent a valid audio dictionary as expected by the model + return {"array": torch.randn(1, 16000), "sampling_rate": 16000} + + +# Test initialization +def test_initialization(whisper_model): + assert whisper_model.model is not None + assert whisper_model.processor is not None + + +# Test successful transcription with file path +def test_transcribe_with_file_path(whisper_model, audio_file_path): + transcription = whisper_model.transcribe(audio_file_path) + assert isinstance(transcription, str) + + +# Test successful transcription with audio dict +def test_transcribe_with_audio_dict(whisper_model, audio_dict): + transcription = whisper_model.transcribe(audio_dict) + assert isinstance(transcription, str) + + +# Test for file not found error +def test_file_not_found(whisper_model, invalid_audio_file_path): + with pytest.raises(Exception): + whisper_model.transcribe(invalid_audio_file_path) + + +# Asynchronous tests +@pytest.mark.asyncio +async def test_async_transcription_success(whisper_model, audio_file_path): + transcription = await whisper_model.async_transcribe(audio_file_path) + assert isinstance(transcription, str) + + +@pytest.mark.asyncio +async def test_async_transcription_failure(whisper_model, invalid_audio_file_path): + with pytest.raises(Exception): + await whisper_model.async_transcribe(invalid_audio_file_path) + + +# Testing real-time transcription simulation +def test_real_time_transcription(whisper_model, audio_file_path, capsys): + whisper_model.real_time_transcribe(audio_file_path, chunk_duration=1) + captured = capsys.readouterr() + assert "Starting real-time transcription..." in captured.out + + +# Testing retry decorator for asynchronous function +@pytest.mark.asyncio +async def test_async_retry(): + @async_retry(max_retries=2, exceptions=(ValueError,), delay=0) + async def failing_func(): + raise ValueError("Test") + + with pytest.raises(ValueError): + await failing_func() + + +# Mocking the actual model to avoid GPU/CPU intensive operations during test +@pytest.fixture +def mocked_model(monkeypatch): + model_mock = AsyncMock(AutoModelForSpeechSeq2Seq) + processor_mock = MagicMock(AutoProcessor) + monkeypatch.setattr( + "swarms.models.distilled_whisperx.AutoModelForSpeechSeq2Seq.from_pretrained", + model_mock, + ) + monkeypatch.setattr( + "swarms.models.distilled_whisperx.AutoProcessor.from_pretrained", processor_mock + ) + return model_mock, processor_mock + + +@pytest.mark.asyncio +async def test_async_transcribe_with_mocked_model(mocked_model, audio_file_path): + model_mock, processor_mock = mocked_model + # Set up what the mock should return when it's called + model_mock.return_value.generate.return_value = torch.tensor([[0]]) + processor_mock.return_value.batch_decode.return_value = ["mocked transcription"] + model_wrapper = DistilWhisperModel() + transcription = await model_wrapper.async_transcribe(audio_file_path) + assert transcription == "mocked transcription" diff --git a/tests/models/gpt4v.py b/tests/models/gpt4v.py new file mode 100644 index 00000000..23e97d03 --- /dev/null +++ b/tests/models/gpt4v.py @@ -0,0 +1,386 @@ +import logging +import os +from unittest.mock import Mock + +import pytest +from dotenv import load_dotenv +from requests.exceptions import ConnectionError, HTTPError, RequestException, Timeout + +from swarms.models.gpt4v import GPT4Vision, GPT4VisionResponse + +load_dotenv + +api_key = os.getenv("OPENAI_API_KEY") + + +# Mock the OpenAI client +@pytest.fixture +def mock_openai_client(): + return Mock() + + +@pytest.fixture +def gpt4vision(mock_openai_client): + return GPT4Vision(client=mock_openai_client) + + +def test_gpt4vision_default_values(): + # Arrange and Act + gpt4vision = GPT4Vision() + + # Assert + assert gpt4vision.max_retries == 3 + assert gpt4vision.model == "gpt-4-vision-preview" + assert gpt4vision.backoff_factor == 2.0 + assert gpt4vision.timeout_seconds == 10 + assert gpt4vision.api_key is None + assert gpt4vision.quality == "low" + assert gpt4vision.max_tokens == 200 + + +def test_gpt4vision_api_key_from_env_variable(): + # Arrange + api_key = os.environ["OPENAI_API_KEY"] + + # Act + gpt4vision = GPT4Vision() + + # Assert + assert gpt4vision.api_key == api_key + + +def test_gpt4vision_set_api_key(): + # Arrange + gpt4vision = GPT4Vision(api_key=api_key) + + # Assert + assert gpt4vision.api_key == api_key + + +def test_gpt4vision_invalid_max_retries(): + # Arrange and Act + with pytest.raises(ValueError): + GPT4Vision(max_retries=-1) + + +def test_gpt4vision_invalid_backoff_factor(): + # Arrange and Act + with pytest.raises(ValueError): + GPT4Vision(backoff_factor=-1) + + +def test_gpt4vision_invalid_timeout_seconds(): + # Arrange and Act + with pytest.raises(ValueError): + GPT4Vision(timeout_seconds=-1) + + +def test_gpt4vision_invalid_max_tokens(): + # Arrange and Act + with pytest.raises(ValueError): + GPT4Vision(max_tokens=-1) + + +def test_gpt4vision_logger_initialized(): + # Arrange + gpt4vision = GPT4Vision() + + # Assert + assert isinstance(gpt4vision.logger, logging.Logger) + + +def test_gpt4vision_process_img_nonexistent_file(): + # Arrange + gpt4vision = GPT4Vision() + img_path = "nonexistent_image.jpg" + + # Act and Assert + with pytest.raises(FileNotFoundError): + gpt4vision.process_img(img_path) + + +def test_gpt4vision_call_single_task_single_image_no_openai_client(gpt4vision): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + task = "Describe this image." + + # Act and Assert + with pytest.raises(AttributeError): + gpt4vision(img_url, [task]) + + +def test_gpt4vision_call_single_task_single_image_empty_response( + gpt4vision, mock_openai_client +): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + task = "Describe this image." + + mock_openai_client.chat.completions.create.return_value.choices = [] + + # Act + response = gpt4vision(img_url, [task]) + + # Assert + assert response.answer == "" + mock_openai_client.chat.completions.create.assert_called_once() + + +def test_gpt4vision_call_multiple_tasks_single_image_empty_responses( + gpt4vision, mock_openai_client +): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + tasks = ["Describe this image.", "What's in this picture?"] + + mock_openai_client.chat.completions.create.return_value.choices = [] + + # Act + responses = gpt4vision(img_url, tasks) + + # Assert + assert all(response.answer == "" for response in responses) + assert ( + mock_openai_client.chat.completions.create.call_count == 1 + ) # Should be called only once + + +def test_gpt4vision_call_single_task_single_image_timeout( + gpt4vision, mock_openai_client +): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + task = "Describe this image." + + mock_openai_client.chat.completions.create.side_effect = Timeout( + "Request timed out" + ) + + # Act and Assert + with pytest.raises(Timeout): + gpt4vision(img_url, [task]) + + +def test_gpt4vision_call_retry_with_success_after_timeout( + gpt4vision, mock_openai_client +): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + task = "Describe this image." + + # Simulate success after a timeout and retry + mock_openai_client.chat.completions.create.side_effect = [ + Timeout("Request timed out"), + { + "choices": [ + {"message": {"content": {"text": "A description of the image."}}} + ], + }, + ] + + # Act + response = gpt4vision(img_url, [task]) + + # Assert + assert response.answer == "A description of the image." + assert ( + mock_openai_client.chat.completions.create.call_count == 2 + ) # Should be called twice + + +def test_gpt4vision_process_img(): + # Arrange + img_path = "test_image.jpg" + gpt4vision = GPT4Vision() + + # Act + img_data = gpt4vision.process_img(img_path) + + # Assert + assert img_data.startswith("/9j/") # Base64-encoded image data + + +def test_gpt4vision_call_single_task_single_image(gpt4vision, mock_openai_client): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + task = "Describe this image." + + expected_response = GPT4VisionResponse(answer="A description of the image.") + + mock_openai_client.chat.completions.create.return_value.choices[ + 0 + ].text = expected_response.answer + + # Act + response = gpt4vision(img_url, [task]) + + # Assert + assert response == expected_response + mock_openai_client.chat.completions.create.assert_called_once() + + +def test_gpt4vision_call_single_task_multiple_images(gpt4vision, mock_openai_client): + # Arrange + img_urls = ["https://example.com/image1.jpg", "https://example.com/image2.jpg"] + task = "Describe these images." + + expected_response = GPT4VisionResponse(answer="Descriptions of the images.") + + mock_openai_client.chat.completions.create.return_value.choices[ + 0 + ].text = expected_response.answer + + # Act + response = gpt4vision(img_urls, [task]) + + # Assert + assert response == expected_response + mock_openai_client.chat.completions.create.assert_called_once() + + +def test_gpt4vision_call_multiple_tasks_single_image(gpt4vision, mock_openai_client): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + tasks = ["Describe this image.", "What's in this picture?"] + + expected_responses = [ + GPT4VisionResponse(answer="A description of the image."), + GPT4VisionResponse(answer="It contains various objects."), + ] + + def create_mock_response(response): + return {"choices": [{"message": {"content": {"text": response.answer}}}]} + + mock_openai_client.chat.completions.create.side_effect = [ + create_mock_response(response) for response in expected_responses + ] + + # Act + responses = gpt4vision(img_url, tasks) + + # Assert + assert responses == expected_responses + assert ( + mock_openai_client.chat.completions.create.call_count == 1 + ) # Should be called only once + + def test_gpt4vision_call_multiple_tasks_single_image( + gpt4vision, mock_openai_client + ): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + tasks = ["Describe this image.", "What's in this picture?"] + + expected_responses = [ + GPT4VisionResponse(answer="A description of the image."), + GPT4VisionResponse(answer="It contains various objects."), + ] + + mock_openai_client.chat.completions.create.side_effect = [ + { + "choices": [ + {"message": {"content": {"text": expected_responses[i].answer}}} + ] + } + for i in range(len(expected_responses)) + ] + + # Act + responses = gpt4vision(img_url, tasks) + + # Assert + assert responses == expected_responses + assert ( + mock_openai_client.chat.completions.create.call_count == 1 + ) # Should be called only once + + +def test_gpt4vision_call_multiple_tasks_multiple_images(gpt4vision, mock_openai_client): + # Arrange + img_urls = [ + "https://images.unsplash.com/photo-1694734479857-626882b6db37?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D", + "https://images.unsplash.com/photo-1694734479898-6ac4633158ac?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D", + ] + tasks = ["Describe these images.", "What's in these pictures?"] + + expected_responses = [ + GPT4VisionResponse(answer="Descriptions of the images."), + GPT4VisionResponse(answer="They contain various objects."), + ] + + mock_openai_client.chat.completions.create.side_effect = [ + {"choices": [{"message": {"content": {"text": response.answer}}}]} + for response in expected_responses + ] + + # Act + responses = gpt4vision(img_urls, tasks) + + # Assert + assert responses == expected_responses + assert ( + mock_openai_client.chat.completions.create.call_count == 1 + ) # Should be called only once + + +def test_gpt4vision_call_http_error(gpt4vision, mock_openai_client): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + task = "Describe this image." + + mock_openai_client.chat.completions.create.side_effect = HTTPError("HTTP Error") + + # Act and Assert + with pytest.raises(HTTPError): + gpt4vision(img_url, [task]) + + +def test_gpt4vision_call_request_error(gpt4vision, mock_openai_client): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + task = "Describe this image." + + mock_openai_client.chat.completions.create.side_effect = RequestException( + "Request Error" + ) + + # Act and Assert + with pytest.raises(RequestException): + gpt4vision(img_url, [task]) + + +def test_gpt4vision_call_connection_error(gpt4vision, mock_openai_client): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + task = "Describe this image." + + mock_openai_client.chat.completions.create.side_effect = ConnectionError( + "Connection Error" + ) + + # Act and Assert + with pytest.raises(ConnectionError): + gpt4vision(img_url, [task]) + + +def test_gpt4vision_call_retry_with_success(gpt4vision, mock_openai_client): + # Arrange + img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + task = "Describe this image." + + # Simulate success after a retry + mock_openai_client.chat.completions.create.side_effect = [ + RequestException("Temporary error"), + { + "choices": [{"text": "A description of the image."}] + }, # fixed dictionary syntax + ] + + # Act + response = gpt4vision(img_url, [task]) + + # Assert + assert response.answer == "A description of the image." + assert ( + mock_openai_client.chat.completions.create.call_count == 2 + ) # Should be called twice diff --git a/tests/models/huggingface.py b/tests/models/huggingface.py index 847ced06..7c43145a 100644 --- a/tests/models/huggingface.py +++ b/tests/models/huggingface.py @@ -70,11 +70,14 @@ def test_llm_memory_consumption(llm_instance): # Test different initialization parameters -@pytest.mark.parametrize("model_id, max_length", [ - ("gpt2-small", 100), - ("gpt2-medium", 200), - ("gpt2-large", None) # None to check default behavior -]) +@pytest.mark.parametrize( + "model_id, max_length", + [ + ("gpt2-small", 100), + ("gpt2-medium", 200), + ("gpt2-large", None), # None to check default behavior + ], +) def test_llm_initialization_params(model_id, max_length): if max_length: instance = HuggingfaceLLM(model_id=model_id, max_length=max_length) @@ -161,7 +164,9 @@ def test_llm_response_time(mock_run, llm_instance): start_time = time.time() llm_instance.run("test task for response time") end_time = time.time() - assert end_time - start_time < 1 # Assuming the response should be faster than 1 second + assert ( + end_time - start_time < 1 + ) # Assuming the response should be faster than 1 second # Test the logging of a warning for long inputs @@ -173,7 +178,9 @@ def test_llm_long_input_warning(mock_warning, llm_instance): # Test for run method behavior when model raises an exception -@patch("swarms.models.huggingface.HuggingfaceLLM._model.generate", side_effect=RuntimeError) +@patch( + "swarms.models.huggingface.HuggingfaceLLM._model.generate", side_effect=RuntimeError +) def test_llm_run_model_exception(mock_generate, llm_instance): with pytest.raises(RuntimeError): llm_instance.run("test task when model fails") diff --git a/tests/models/mpt7b.py b/tests/models/mpt7b.py index cdbd57f6..dfde578d 100644 --- a/tests/models/mpt7b.py +++ b/tests/models/mpt7b.py @@ -1,5 +1,6 @@ import pytest from transformers import AutoModelForCausalLM, AutoTokenizer + from swarms.models.mpt import MPT7B diff --git a/tests/structs/sequential_workflow.py b/tests/structs/sequential_workflow.py new file mode 100644 index 00000000..7bd3e4a4 --- /dev/null +++ b/tests/structs/sequential_workflow.py @@ -0,0 +1,333 @@ +import asyncio +import os +from unittest.mock import patch + +import pytest + +from swarms.models import OpenAIChat +from swarms.structs.flow import Flow +from swarms.structs.sequential_workflow import SequentialWorkflow, Task + +# Mock the OpenAI API key using environment variables +os.environ["OPENAI_API_KEY"] = "mocked_api_key" + + +# Mock OpenAIChat class for testing +class MockOpenAIChat: + def __init__(self, *args, **kwargs): + pass + + def run(self, *args, **kwargs): + return "Mocked result" + + +# Mock Flow class for testing +class MockFlow: + def __init__(self, *args, **kwargs): + pass + + def run(self, *args, **kwargs): + return "Mocked result" + + +# Mock SequentialWorkflow class for testing +class MockSequentialWorkflow: + def __init__(self, *args, **kwargs): + pass + + def add(self, *args, **kwargs): + pass + + def run(self): + pass + + +# Test Task class +def test_task_initialization(): + description = "Sample Task" + flow = MockOpenAIChat() + task = Task(description=description, flow=flow) + assert task.description == description + assert task.flow == flow + + +def test_task_execute(): + description = "Sample Task" + flow = MockOpenAIChat() + task = Task(description=description, flow=flow) + task.execute() + assert task.result == "Mocked result" + + +# Test SequentialWorkflow class +def test_sequential_workflow_initialization(): + workflow = SequentialWorkflow() + assert isinstance(workflow, SequentialWorkflow) + assert len(workflow.tasks) == 0 + assert workflow.max_loops == 1 + assert workflow.autosave == False + assert workflow.saved_state_filepath == "sequential_workflow_state.json" + assert workflow.restore_state_filepath == None + assert workflow.dashboard == False + + +def test_sequential_workflow_add_task(): + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = MockOpenAIChat() + workflow.add(task_description, task_flow) + assert len(workflow.tasks) == 1 + assert workflow.tasks[0].description == task_description + assert workflow.tasks[0].flow == task_flow + + +def test_sequential_workflow_reset_workflow(): + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = MockOpenAIChat() + workflow.add(task_description, task_flow) + workflow.reset_workflow() + assert workflow.tasks[0].result == None + + +def test_sequential_workflow_get_task_results(): + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = MockOpenAIChat() + workflow.add(task_description, task_flow) + workflow.run() + results = workflow.get_task_results() + assert len(results) == 1 + assert task_description in results + assert results[task_description] == "Mocked result" + + +def test_sequential_workflow_remove_task(): + workflow = SequentialWorkflow() + task1_description = "Task 1" + task2_description = "Task 2" + task1_flow = MockOpenAIChat() + task2_flow = MockOpenAIChat() + workflow.add(task1_description, task1_flow) + workflow.add(task2_description, task2_flow) + workflow.remove_task(task1_description) + assert len(workflow.tasks) == 1 + assert workflow.tasks[0].description == task2_description + + +def test_sequential_workflow_update_task(): + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = MockOpenAIChat() + workflow.add(task_description, task_flow) + workflow.update_task(task_description, max_tokens=1000) + assert workflow.tasks[0].kwargs["max_tokens"] == 1000 + + +def test_sequential_workflow_save_workflow_state(): + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = MockOpenAIChat() + workflow.add(task_description, task_flow) + workflow.save_workflow_state("test_state.json") + assert os.path.exists("test_state.json") + os.remove("test_state.json") + + +def test_sequential_workflow_load_workflow_state(): + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = MockOpenAIChat() + workflow.add(task_description, task_flow) + workflow.save_workflow_state("test_state.json") + workflow.load_workflow_state("test_state.json") + assert len(workflow.tasks) == 1 + assert workflow.tasks[0].description == task_description + os.remove("test_state.json") + + +def test_sequential_workflow_run(): + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = MockOpenAIChat() + workflow.add(task_description, task_flow) + workflow.run() + assert workflow.tasks[0].result == "Mocked result" + + +def test_sequential_workflow_workflow_bootup(capfd): + workflow = SequentialWorkflow() + workflow.workflow_bootup() + out, _ = capfd.readouterr() + assert "Sequential Workflow Initializing..." in out + + +def test_sequential_workflow_workflow_dashboard(capfd): + workflow = SequentialWorkflow() + workflow.workflow_dashboard() + out, _ = capfd.readouterr() + assert "Sequential Workflow Dashboard" in out + + +# Mock Flow class for async testing +class MockAsyncFlow: + def __init__(self, *args, **kwargs): + pass + + async def arun(self, *args, **kwargs): + return "Mocked result" + + +# Test async execution in SequentialWorkflow +@pytest.mark.asyncio +async def test_sequential_workflow_arun(): + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = MockAsyncFlow() + workflow.add(task_description, task_flow) + await workflow.arun() + assert workflow.tasks[0].result == "Mocked result" + + +def test_real_world_usage_with_openai_key(): + # Initialize the language model + llm = OpenAIChat() + assert isinstance(llm, OpenAIChat) + + +def test_real_world_usage_with_flow_and_openai_key(): + # Initialize a flow with the language model + flow = Flow(llm=OpenAIChat()) + assert isinstance(flow, Flow) + + +def test_real_world_usage_with_sequential_workflow(): + # Initialize a sequential workflow + workflow = SequentialWorkflow() + assert isinstance(workflow, SequentialWorkflow) + + +def test_real_world_usage_add_tasks(): + # Create a sequential workflow and add tasks + workflow = SequentialWorkflow() + task1_description = "Task 1" + task2_description = "Task 2" + task1_flow = OpenAIChat() + task2_flow = OpenAIChat() + workflow.add(task1_description, task1_flow) + workflow.add(task2_description, task2_flow) + assert len(workflow.tasks) == 2 + assert workflow.tasks[0].description == task1_description + assert workflow.tasks[1].description == task2_description + + +def test_real_world_usage_run_workflow(): + # Create a sequential workflow, add a task, and run the workflow + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = OpenAIChat() + workflow.add(task_description, task_flow) + workflow.run() + assert workflow.tasks[0].result is not None + + +def test_real_world_usage_dashboard_display(): + # Create a sequential workflow, add tasks, and display the dashboard + workflow = SequentialWorkflow() + task1_description = "Task 1" + task2_description = "Task 2" + task1_flow = OpenAIChat() + task2_flow = OpenAIChat() + workflow.add(task1_description, task1_flow) + workflow.add(task2_description, task2_flow) + with patch("builtins.print") as mock_print: + workflow.workflow_dashboard() + mock_print.assert_called() + + +def test_real_world_usage_async_execution(): + # Create a sequential workflow, add an async task, and run the workflow asynchronously + workflow = SequentialWorkflow() + task_description = "Sample Task" + async_task_flow = OpenAIChat() + + async def async_run_workflow(): + await workflow.arun() + + workflow.add(task_description, async_task_flow) + asyncio.run(async_run_workflow()) + assert workflow.tasks[0].result is not None + + +def test_real_world_usage_multiple_loops(): + # Create a sequential workflow with multiple loops, add a task, and run the workflow + workflow = SequentialWorkflow(max_loops=3) + task_description = "Sample Task" + task_flow = OpenAIChat() + workflow.add(task_description, task_flow) + workflow.run() + assert workflow.tasks[0].result is not None + + +def test_real_world_usage_autosave_state(): + # Create a sequential workflow with autosave, add a task, run the workflow, and check if state is saved + workflow = SequentialWorkflow(autosave=True) + task_description = "Sample Task" + task_flow = OpenAIChat() + workflow.add(task_description, task_flow) + workflow.run() + assert workflow.tasks[0].result is not None + assert os.path.exists("sequential_workflow_state.json") + os.remove("sequential_workflow_state.json") + + +def test_real_world_usage_load_state(): + # Create a sequential workflow, add a task, save state, load state, and run the workflow + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = OpenAIChat() + workflow.add(task_description, task_flow) + workflow.run() + workflow.save_workflow_state("test_state.json") + workflow.load_workflow_state("test_state.json") + workflow.run() + assert workflow.tasks[0].result is not None + os.remove("test_state.json") + + +def test_real_world_usage_update_task_args(): + # Create a sequential workflow, add a task, and update task arguments + workflow = SequentialWorkflow() + task_description = "Sample Task" + task_flow = OpenAIChat() + workflow.add(task_description, task_flow) + workflow.update_task(task_description, max_tokens=1000) + assert workflow.tasks[0].kwargs["max_tokens"] == 1000 + + +def test_real_world_usage_remove_task(): + # Create a sequential workflow, add tasks, remove a task, and run the workflow + workflow = SequentialWorkflow() + task1_description = "Task 1" + task2_description = "Task 2" + task1_flow = OpenAIChat() + task2_flow = OpenAIChat() + workflow.add(task1_description, task1_flow) + workflow.add(task2_description, task2_flow) + workflow.remove_task(task1_description) + workflow.run() + assert len(workflow.tasks) == 1 + assert workflow.tasks[0].description == task2_description + + +def test_real_world_usage_with_environment_variables(): + # Ensure that the OpenAI API key is set using environment variables + assert "OPENAI_API_KEY" in os.environ + assert os.environ["OPENAI_API_KEY"] == "mocked_api_key" + del os.environ["OPENAI_API_KEY"] # Clean up after the test + + +def test_real_world_usage_no_openai_key(): + # Ensure that an exception is raised when the OpenAI API key is not set + with pytest.raises(ValueError): + llm = OpenAIChat() # API key not provided, should raise an exception diff --git a/workflow.py b/workflow.py deleted file mode 100644 index bc757108..00000000 --- a/workflow.py +++ /dev/null @@ -1,11 +0,0 @@ -from swarms.models import OpenAIChat -from swarms.structs import Workflow - - -llm = OpenAIChat(openai_api_key="") - -workflow = Workflow(llm) - -workflow.add("What's the weather in miami") - -workflow.run()