Merge branch 'master' of https://github.com/kyegomez/swarms
# Conflicts: # swarms/memory/chroma.pypull/307/head
@ -1,2 +1,2 @@
|
||||
[flake8]
|
||||
extend-ignore = E501, W292, W291
|
||||
extend-ignore = E501, W292, W291, W293
|
||||
|
@ -1,13 +1,14 @@
|
||||
---
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: [kyegomez]
|
||||
patreon: # Replace with a single Patreon username
|
||||
open_collective: # Replace with a single Open Collective username
|
||||
ko_fi: # Replace with a single Ko-fi username
|
||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
||||
liberapay: # Replace with a single Liberapay username
|
||||
issuehunt: # Replace with a single IssueHunt username
|
||||
otechie: # Replace with a single Otechie username
|
||||
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
||||
custom: #Nothing
|
||||
# patreon: # Replace with a single Patreon username
|
||||
# open_collective: # Replace with a single Open Collective username
|
||||
# ko_fi: # Replace with a single Ko-fi username
|
||||
# tidelift: # Replace with a single Tidelift platform-name/package-name
|
||||
# community_bridge: # Replace with a single Community Bridge project-name
|
||||
# liberapay: # Replace with a single Liberapay username
|
||||
# issuehunt: # Replace with a single IssueHunt username
|
||||
# otechie: # Replace with a single Otechie username
|
||||
# lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name
|
||||
# custom: #Nothing
|
||||
|
@ -1,12 +1,14 @@
|
||||
---
|
||||
# this is a config file for the github action labeler
|
||||
|
||||
# Add 'label1' to any changes within 'example' folder or any subfolders
|
||||
example_change:
|
||||
- example/**
|
||||
- example/**
|
||||
|
||||
# Add 'label2' to any file changes within 'example2' folder
|
||||
example2_change: example2/*
|
||||
|
||||
# Add label3 to any change to .txt files within the entire repository. Quotation marks are required for the leading asterisk
|
||||
# Add label3 to any change to .txt files within the entire repository.
|
||||
# Quotation marks are required for the leading asterisk
|
||||
text_files:
|
||||
- '**/*.txt'
|
||||
- '**/*.txt'
|
||||
|
@ -1,30 +0,0 @@
|
||||
name: Linting and Formatting
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
lint_and_format:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.x
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install -r requirements.txt
|
||||
|
||||
- name: Find Python files
|
||||
run: find swarms -name "*.py" -type f -exec autopep8 --in-place --aggressive --aggressive {} +
|
||||
|
||||
- name: Push changes
|
||||
uses: ad-m/github-push-action@master
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
@ -1,42 +0,0 @@
|
||||
name: Continuous Integration
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.x
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install -r requirements.txt
|
||||
|
||||
- name: Run unit tests
|
||||
run: pytest tests/unit
|
||||
|
||||
- name: Run integration tests
|
||||
run: pytest tests/integration
|
||||
|
||||
- name: Run code coverage
|
||||
run: pytest --cov=swarms tests/
|
||||
|
||||
- name: Run linters
|
||||
run: pylint swarms
|
||||
|
||||
- name: Build documentation
|
||||
run: make docs
|
||||
|
||||
- name: Validate documentation
|
||||
run: sphinx-build -b linkcheck docs build/docs
|
||||
|
||||
- name: Run performance tests
|
||||
run: find ./tests -name '*.py' -exec pytest {} \;
|
@ -1,18 +1,19 @@
|
||||
---
|
||||
name: Docker Image CI
|
||||
|
||||
on:
|
||||
on: # yamllint disable-line rule:truthy
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
branches: ["master"]
|
||||
pull_request:
|
||||
branches: [ "master" ]
|
||||
branches: ["master"]
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
name: Build Docker image
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build the Docker image
|
||||
run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build the Docker image
|
||||
run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
|
||||
|
@ -1,28 +0,0 @@
|
||||
name: Documentation Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.x
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install -r requirements.txt
|
||||
|
||||
- name: Build documentation
|
||||
run: make docs
|
||||
|
||||
- name: Validate documentation
|
||||
run: sphinx-build -b linkcheck docs build/docs
|
@ -1,19 +1,29 @@
|
||||
---
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Lint
|
||||
|
||||
on: [push, pull_request]
|
||||
on: [push, pull_request] # yamllint disable-line rule:truthy
|
||||
|
||||
jobs:
|
||||
yaml-lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out source repository
|
||||
uses: actions/checkout@v4
|
||||
- name: yaml Lint
|
||||
uses: ibiqlik/action-yamllint@v3
|
||||
flake8-lint:
|
||||
runs-on: ubuntu-latest
|
||||
name: Lint
|
||||
name: flake8 Lint
|
||||
steps:
|
||||
- name: Check out source repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python environment
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: flake8 Lint
|
||||
uses: py-actions/flake8@v2
|
||||
uses: py-actions/flake8@v2
|
||||
ruff-lint:
|
||||
runs-on: ubuntu-latest
|
||||
name: ruff Lint
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: chartboost/ruff-action@v1
|
||||
|
@ -1,25 +0,0 @@
|
||||
name: Linting
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.x
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install -r requirements.txt
|
||||
|
||||
- name: Run linters
|
||||
run: pylint swarms
|
@ -1,23 +0,0 @@
|
||||
name: Pylint
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8", "3.9", "3.10"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pylint
|
||||
- name: Analysing the code with pylint
|
||||
run: |
|
||||
pylint $(git ls-files '*.py')
|
@ -1,39 +0,0 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a single version of Python
|
||||
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
|
||||
|
||||
name: Python application
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
pull_request:
|
||||
branches: [ "master" ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
pytest
|
@ -1,23 +0,0 @@
|
||||
name: Quality
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
pull_request:
|
||||
branches: [ "master" ]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout actions
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Init environment
|
||||
uses: ./.github/actions/init-environment
|
||||
- name: Run linter
|
||||
run: |
|
||||
pylint `git diff --name-only --diff-filter=d origin/master HEAD | grep -E '\.py$' | tr '\n' ' '`
|
@ -1,8 +0,0 @@
|
||||
name: Ruff
|
||||
on: [ push, pull_request ]
|
||||
jobs:
|
||||
ruff:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: chartboost/ruff-action@v1
|
@ -1,23 +0,0 @@
|
||||
name: Python application test
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.8
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Run tests with pytest
|
||||
run: |
|
||||
find tests/ -name "*.py" | xargs pytest
|
@ -1,21 +0,0 @@
|
||||
Developers
|
||||
|
||||
Install pre-commit (https://pre-commit.com/)
|
||||
|
||||
```bash
|
||||
pip install pre-commit
|
||||
```
|
||||
|
||||
Check that it's installed
|
||||
|
||||
```bash
|
||||
pre-commit --versioni
|
||||
```
|
||||
|
||||
This repository already has a pre-commit configuration. To install the hooks, run:
|
||||
|
||||
```bash
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
Now when you make a git commit, the black code formatter and ruff linter will run.
|
@ -0,0 +1,81 @@
|
||||
# Qdrant Client Library
|
||||
|
||||
## Overview
|
||||
|
||||
The Qdrant Client Library is designed for interacting with the Qdrant vector database, allowing efficient storage and retrieval of high-dimensional vector data. It integrates with machine learning models for embedding and is particularly suited for search and recommendation systems.
|
||||
|
||||
## Installation
|
||||
|
||||
```python
|
||||
pip install qdrant-client sentence-transformers httpx
|
||||
```
|
||||
|
||||
## Class Definition: Qdrant
|
||||
|
||||
```python
|
||||
class Qdrant:
|
||||
def __init__(self, api_key: str, host: str, port: int = 6333, collection_name: str = "qdrant", model_name: str = "BAAI/bge-small-en-v1.5", https: bool = True):
|
||||
...
|
||||
```
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
| Parameter | Type | Description | Default Value |
|
||||
|-----------------|---------|--------------------------------------------------|-----------------------|
|
||||
| api_key | str | API key for authentication. | - |
|
||||
| host | str | Host address of the Qdrant server. | - |
|
||||
| port | int | Port number for the Qdrant server. | 6333 |
|
||||
| collection_name | str | Name of the collection to be used or created. | "qdrant" |
|
||||
| model_name | str | Name of the sentence transformer model. | "BAAI/bge-small-en-v1.5" |
|
||||
| https | bool | Flag to use HTTPS for connection. | True |
|
||||
|
||||
### Methods
|
||||
|
||||
#### `_load_embedding_model(model_name: str)`
|
||||
|
||||
Loads the sentence embedding model.
|
||||
|
||||
#### `_setup_collection()`
|
||||
|
||||
Checks if the specified collection exists in Qdrant; if not, creates it.
|
||||
|
||||
#### `add_vectors(docs: List[dict]) -> OperationResponse`
|
||||
|
||||
Adds vectors to the Qdrant collection.
|
||||
|
||||
#### `search_vectors(query: str, limit: int = 3) -> SearchResult`
|
||||
|
||||
Searches the Qdrant collection for vectors similar to the query vector.
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Example 1: Setting Up the Qdrant Client
|
||||
|
||||
```python
|
||||
from qdrant_client import Qdrant
|
||||
|
||||
qdrant_client = Qdrant(api_key="your_api_key", host="localhost", port=6333)
|
||||
```
|
||||
|
||||
### Example 2: Adding Vectors to a Collection
|
||||
|
||||
```python
|
||||
documents = [
|
||||
{"page_content": "Sample text 1"},
|
||||
{"page_content": "Sample text 2"}
|
||||
]
|
||||
|
||||
operation_info = qdrant_client.add_vectors(documents)
|
||||
print(operation_info)
|
||||
```
|
||||
|
||||
### Example 3: Searching for Vectors
|
||||
|
||||
```python
|
||||
search_result = qdrant_client.search_vectors("Sample search query")
|
||||
print(search_result)
|
||||
```
|
||||
|
||||
## Further Information
|
||||
|
||||
Refer to the [Qdrant Documentation](https://qdrant.tech/docs) for more details on the Qdrant vector database.
|
@ -0,0 +1,227 @@
|
||||
# Language Model Interface Documentation
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [Abstract Language Model](#abstract-language-model)
|
||||
- [Initialization](#initialization)
|
||||
- [Attributes](#attributes)
|
||||
- [Methods](#methods)
|
||||
3. [Implementation](#implementation)
|
||||
4. [Usage Examples](#usage-examples)
|
||||
5. [Additional Features](#additional-features)
|
||||
6. [Performance Metrics](#performance-metrics)
|
||||
7. [Logging and Checkpoints](#logging-and-checkpoints)
|
||||
8. [Resource Utilization Tracking](#resource-utilization-tracking)
|
||||
9. [Conclusion](#conclusion)
|
||||
|
||||
---
|
||||
|
||||
## 1. Introduction <a name="introduction"></a>
|
||||
|
||||
The Language Model Interface (`AbstractLLM`) is a flexible and extensible framework for working with various language models. This documentation provides a comprehensive guide to the interface, its attributes, methods, and usage examples. Whether you're using a pre-trained language model or building your own, this interface can help streamline the process of text generation, chatbots, summarization, and more.
|
||||
|
||||
## 2. Abstract Language Model <a name="abstract-language-model"></a>
|
||||
|
||||
### Initialization <a name="initialization"></a>
|
||||
|
||||
The `AbstractLLM` class provides a common interface for language models. It can be initialized with various parameters to customize model behavior. Here are the initialization parameters:
|
||||
|
||||
| Parameter | Description | Default Value |
|
||||
|------------------------|-------------------------------------------------------------------------------------------------|---------------|
|
||||
| `model_name` | The name of the language model to use. | None |
|
||||
| `max_tokens` | The maximum number of tokens in the generated text. | None |
|
||||
| `temperature` | The temperature parameter for controlling randomness in text generation. | None |
|
||||
| `top_k` | The top-k parameter for filtering words in text generation. | None |
|
||||
| `top_p` | The top-p parameter for filtering words in text generation. | None |
|
||||
| `system_prompt` | A system-level prompt to set context for generation. | None |
|
||||
| `beam_width` | The beam width for beam search. | None |
|
||||
| `num_return_sequences` | The number of sequences to return in the output. | None |
|
||||
| `seed` | The random seed for reproducibility. | None |
|
||||
| `frequency_penalty` | The frequency penalty parameter for promoting word diversity. | None |
|
||||
| `presence_penalty` | The presence penalty parameter for discouraging repetitions. | None |
|
||||
| `stop_token` | A stop token to indicate the end of generated text. | None |
|
||||
| `length_penalty` | The length penalty parameter for controlling the output length. | None |
|
||||
| `role` | The role of the language model (e.g., assistant, user, etc.). | None |
|
||||
| `max_length` | The maximum length of generated sequences. | None |
|
||||
| `do_sample` | Whether to use sampling during text generation. | None |
|
||||
| `early_stopping` | Whether to use early stopping during text generation. | None |
|
||||
| `num_beams` | The number of beams to use in beam search. | None |
|
||||
| `repition_penalty` | The repetition penalty parameter for discouraging repeated tokens. | None |
|
||||
| `pad_token_id` | The token ID for padding. | None |
|
||||
| `eos_token_id` | The token ID for the end of a sequence. | None |
|
||||
| `bos_token_id` | The token ID for the beginning of a sequence. | None |
|
||||
| `device` | The device to run the model on (e.g., 'cpu' or 'cuda'). | None |
|
||||
|
||||
### Attributes <a name="attributes"></a>
|
||||
|
||||
- `model_name`: The name of the language model being used.
|
||||
- `max_tokens`: The maximum number of tokens in generated text.
|
||||
- `temperature`: The temperature parameter controlling randomness.
|
||||
- `top_k`: The top-k parameter for word filtering.
|
||||
- `top_p`: The top-p parameter for word filtering.
|
||||
- `system_prompt`: A system-level prompt for context.
|
||||
- `beam_width`: The beam width for beam search.
|
||||
- `num_return_sequences`: The number of output sequences.
|
||||
- `seed`: The random seed for reproducibility.
|
||||
- `frequency_penalty`: The frequency penalty parameter.
|
||||
- `presence_penalty`: The presence penalty parameter.
|
||||
- `stop_token`: The stop token to indicate text end.
|
||||
- `length_penalty`: The length penalty parameter.
|
||||
- `role`: The role of the language model.
|
||||
- `max_length`: The maximum length of generated sequences.
|
||||
- `do_sample`: Whether to use sampling during generation.
|
||||
- `early_stopping`: Whether to use early stopping.
|
||||
- `num_beams`: The number of beams in beam search.
|
||||
- `repition_penalty`: The repetition penalty parameter.
|
||||
- `pad_token_id`: The token ID for padding.
|
||||
- `eos_token_id`: The token ID for the end of a sequence.
|
||||
- `bos_token_id`: The token ID for the beginning of a sequence.
|
||||
- `device`: The device used for model execution.
|
||||
- `history`: A list of conversation history.
|
||||
|
||||
### Methods <a name="methods"></a>
|
||||
|
||||
The `AbstractLLM` class defines several methods for working with language models:
|
||||
|
||||
- `run(task: Optional[str] = None, *args, **kwargs) -> str`: Generate text using the language model. This method is abstract and must be implemented by subclasses.
|
||||
|
||||
- `arun(task: Optional[str] = None, *args, **kwargs)`: An asynchronous version of `run` for concurrent text generation.
|
||||
|
||||
- `batch_run(tasks: List[str], *args, **kwargs)`: Generate text for a batch of tasks.
|
||||
|
||||
- `abatch_run(tasks: List[str], *args, **kwargs)`: An asynchronous version of `batch_run` for concurrent batch generation.
|
||||
|
||||
- `chat(task: str, history: str = "") -> str`: Conduct a chat with the model, providing a conversation history.
|
||||
|
||||
- `__call__(task: str) -> str`: Call the model to generate text.
|
||||
|
||||
- `_tokens_per_second() -> float`: Calculate tokens generated per second.
|
||||
|
||||
- `_num_tokens(text: str) -> int`: Calculate the number of tokens in a text.
|
||||
|
||||
- `_time_for_generation(task: str) -> float`: Measure the time taken for text generation.
|
||||
|
||||
- `generate_summary(text: str) -> str`: Generate a summary of the provided text.
|
||||
|
||||
- `set_temperature(value: float)`: Set the temperature parameter.
|
||||
|
||||
- `set_max_tokens(value: int)`: Set the maximum number of tokens.
|
||||
|
||||
- `clear_history()`: Clear the conversation history.
|
||||
|
||||
- `enable_logging(log_file: str = "model.log")`: Initialize logging for the model.
|
||||
|
||||
- `log_event(message: str)`: Log an event.
|
||||
|
||||
- `save_checkpoint(checkpoint_dir: str = "checkpoints")`: Save the model state as a checkpoint.
|
||||
|
||||
- `load_checkpoint(checkpoint_path: str)`: Load the model state from a checkpoint.
|
||||
|
||||
- `toggle_creative_mode(enable: bool)`: Toggle creative mode for the model.
|
||||
|
||||
- `track_resource_utilization()`: Track and report resource utilization.
|
||||
|
||||
- `
|
||||
|
||||
get_generation_time() -> float`: Get the time taken for text generation.
|
||||
|
||||
- `set_max_length(max_length: int)`: Set the maximum length of generated sequences.
|
||||
|
||||
- `set_model_name(model_name: str)`: Set the model name.
|
||||
|
||||
- `set_frequency_penalty(frequency_penalty: float)`: Set the frequency penalty parameter.
|
||||
|
||||
- `set_presence_penalty(presence_penalty: float)`: Set the presence penalty parameter.
|
||||
|
||||
- `set_stop_token(stop_token: str)`: Set the stop token.
|
||||
|
||||
- `set_length_penalty(length_penalty: float)`: Set the length penalty parameter.
|
||||
|
||||
- `set_role(role: str)`: Set the role of the model.
|
||||
|
||||
- `set_top_k(top_k: int)`: Set the top-k parameter.
|
||||
|
||||
- `set_top_p(top_p: float)`: Set the top-p parameter.
|
||||
|
||||
- `set_num_beams(num_beams: int)`: Set the number of beams.
|
||||
|
||||
- `set_do_sample(do_sample: bool)`: Set whether to use sampling.
|
||||
|
||||
- `set_early_stopping(early_stopping: bool)`: Set whether to use early stopping.
|
||||
|
||||
- `set_seed(seed: int)`: Set the random seed.
|
||||
|
||||
- `set_device(device: str)`: Set the device for model execution.
|
||||
|
||||
## 3. Implementation <a name="implementation"></a>
|
||||
|
||||
The `AbstractLLM` class serves as the base for implementing specific language models. Subclasses of `AbstractLLM` should implement the `run` method to define how text is generated for a given task. This design allows flexibility in integrating different language models while maintaining a common interface.
|
||||
|
||||
## 4. Usage Examples <a name="usage-examples"></a>
|
||||
|
||||
To demonstrate how to use the `AbstractLLM` interface, let's create an example using a hypothetical language model. We'll initialize an instance of the model and generate text for a simple task.
|
||||
|
||||
```python
|
||||
# Import the AbstractLLM class
|
||||
from swarms.models import AbstractLLM
|
||||
|
||||
# Create an instance of the language model
|
||||
language_model = AbstractLLM(
|
||||
model_name="my_language_model",
|
||||
max_tokens=50,
|
||||
temperature=0.7,
|
||||
top_k=50,
|
||||
top_p=0.9,
|
||||
device="cuda",
|
||||
)
|
||||
|
||||
# Generate text for a task
|
||||
task = "Translate the following English text to French: 'Hello, world.'"
|
||||
generated_text = language_model.run(task)
|
||||
|
||||
# Print the generated text
|
||||
print(generated_text)
|
||||
```
|
||||
|
||||
In this example, we've created an instance of our hypothetical language model, configured its parameters, and used the `run` method to generate text for a translation task.
|
||||
|
||||
## 5. Additional Features <a name="additional-features"></a>
|
||||
|
||||
The `AbstractLLM` interface provides additional features for customization and control:
|
||||
|
||||
- `batch_run`: Generate text for a batch of tasks efficiently.
|
||||
- `arun` and `abatch_run`: Asynchronous versions of `run` and `batch_run` for concurrent text generation.
|
||||
- `chat`: Conduct a conversation with the model by providing a history of the conversation.
|
||||
- `__call__`: Allow the model to be called directly to generate text.
|
||||
|
||||
These features enhance the flexibility and utility of the interface in various applications, including chatbots, language translation, and content generation.
|
||||
|
||||
## 6. Performance Metrics <a name="performance-metrics"></a>
|
||||
|
||||
The `AbstractLLM` class offers methods for tracking performance metrics:
|
||||
|
||||
- `_tokens_per_second`: Calculate tokens generated per second.
|
||||
- `_num_tokens`: Calculate the number of tokens in a text.
|
||||
- `_time_for_generation`: Measure the time taken for text generation.
|
||||
|
||||
These metrics help assess the efficiency and speed of text generation, enabling optimizations as needed.
|
||||
|
||||
## 7. Logging and Checkpoints <a name="logging-and-checkpoints"></a>
|
||||
|
||||
Logging and checkpointing are crucial for tracking model behavior and ensuring reproducibility:
|
||||
|
||||
- `enable_logging`: Initialize logging for the model.
|
||||
- `log_event`: Log events and activities.
|
||||
- `save_checkpoint`: Save the model state as a checkpoint.
|
||||
- `load_checkpoint`: Load the model state from a checkpoint.
|
||||
|
||||
These capabilities aid in debugging, monitoring, and resuming model experiments.
|
||||
|
||||
## 8. Resource Utilization Tracking <a name="resource-utilization-tracking"></a>
|
||||
|
||||
The `track_resource_utilization` method is a placeholder for tracking and reporting resource utilization, such as CPU and memory usage. It can be customized to suit specific monitoring needs.
|
||||
|
||||
## 9. Conclusion <a name="conclusion"></a>
|
||||
|
||||
The Language Model Interface (`AbstractLLM`) is a versatile framework for working with language models. Whether you're using pre-trained models or developing your own, this interface provides a consistent and extensible foundation. By following the provided guidelines and examples, you can integrate and customize language models for various natural language processing tasks.
|
@ -0,0 +1,293 @@
|
||||
# `BaseMultiModalModel` Documentation
|
||||
|
||||
Swarms is a Python library that provides a framework for running multimodal AI models. It allows you to combine text and image inputs and generate coherent and context-aware responses. This library is designed to be extensible, allowing you to integrate various multimodal models.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [Installation](#installation)
|
||||
3. [Getting Started](#getting-started)
|
||||
4. [BaseMultiModalModel Class](#basemultimodalmodel-class)
|
||||
- [Initialization](#initialization)
|
||||
- [Methods](#methods)
|
||||
5. [Usage Examples](#usage-examples)
|
||||
6. [Additional Tips](#additional-tips)
|
||||
7. [References and Resources](#references-and-resources)
|
||||
|
||||
## 1. Introduction <a name="introduction"></a>
|
||||
|
||||
Swarms is designed to simplify the process of working with multimodal AI models. These models are capable of understanding and generating content based on both textual and image inputs. With this library, you can run such models and receive context-aware responses.
|
||||
|
||||
## 2. Installation <a name="installation"></a>
|
||||
|
||||
To install swarms, you can use pip:
|
||||
|
||||
```bash
|
||||
pip install swarms
|
||||
```
|
||||
|
||||
## 3. Getting Started <a name="getting-started"></a>
|
||||
|
||||
To get started with Swarms, you'll need to import the library and create an instance of the `BaseMultiModalModel` class. This class serves as the foundation for running multimodal models.
|
||||
|
||||
```python
|
||||
from swarms.models import BaseMultiModalModel
|
||||
|
||||
model = BaseMultiModalModel(
|
||||
model_name="your_model_name",
|
||||
temperature=0.5,
|
||||
max_tokens=500,
|
||||
max_workers=10,
|
||||
top_p=1,
|
||||
top_k=50,
|
||||
beautify=False,
|
||||
device="cuda",
|
||||
max_new_tokens=500,
|
||||
retries=3,
|
||||
)
|
||||
```
|
||||
|
||||
You can customize the initialization parameters based on your model's requirements.
|
||||
|
||||
## 4. BaseMultiModalModel Class <a name="basemultimodalmodel-class"></a>
|
||||
|
||||
### Initialization <a name="initialization"></a>
|
||||
|
||||
The `BaseMultiModalModel` class is initialized with several parameters that control its behavior. Here's a breakdown of the initialization parameters:
|
||||
|
||||
| Parameter | Description | Default Value |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------|---------------|
|
||||
| `model_name` | The name of the multimodal model to use. | None |
|
||||
| `temperature` | The temperature parameter for controlling randomness in text generation. | 0.5 |
|
||||
| `max_tokens` | The maximum number of tokens in the generated text. | 500 |
|
||||
| `max_workers` | The maximum number of concurrent workers for running tasks. | 10 |
|
||||
| `top_p` | The top-p parameter for filtering words in text generation. | 1 |
|
||||
| `top_k` | The top-k parameter for filtering words in text generation. | 50 |
|
||||
| `beautify` | Whether to beautify the output text. | False |
|
||||
| `device` | The device to run the model on (e.g., 'cuda' or 'cpu'). | 'cuda' |
|
||||
| `max_new_tokens` | The maximum number of new tokens allowed in generated responses. | 500 |
|
||||
| `retries` | The number of retries in case of an error during text generation. | 3 |
|
||||
| `system_prompt` | A system-level prompt to set context for generation. | None |
|
||||
| `meta_prompt` | A meta prompt to provide guidance for including image labels in responses. | None |
|
||||
|
||||
### Methods <a name="methods"></a>
|
||||
|
||||
The `BaseMultiModalModel` class defines various methods for running multimodal models and managing interactions:
|
||||
|
||||
- `run(task: str, img: str) -> str`: Run the multimodal model with a text task and an image URL to generate a response.
|
||||
|
||||
- `arun(task: str, img: str) -> str`: Run the multimodal model asynchronously with a text task and an image URL to generate a response.
|
||||
|
||||
- `get_img_from_web(img: str) -> Image`: Fetch an image from a URL and return it as a PIL Image.
|
||||
|
||||
- `encode_img(img: str) -> str`: Encode an image to base64 format.
|
||||
|
||||
- `get_img(img: str) -> Image`: Load an image from the local file system and return it as a PIL Image.
|
||||
|
||||
- `clear_chat_history()`: Clear the chat history maintained by the model.
|
||||
|
||||
- `run_many(tasks: List[str], imgs: List[str]) -> List[str]`: Run the model on multiple text tasks and image URLs concurrently and return a list of responses.
|
||||
|
||||
- `run_batch(tasks_images: List[Tuple[str, str]]) -> List[str]`: Process a batch of text tasks and image URLs and return a list of responses.
|
||||
|
||||
- `run_batch_async(tasks_images: List[Tuple[str, str]]) -> List[str]`: Process a batch of text tasks and image URLs asynchronously and return a list of responses.
|
||||
|
||||
- `run_batch_async_with_retries(tasks_images: List[Tuple[str, str]]) -> List[str]`: Process a batch of text tasks and image URLs asynchronously with retries in case of errors and return a list of responses.
|
||||
|
||||
- `unique_chat_history() -> List[str]`: Get the unique chat history stored by the model.
|
||||
|
||||
- `run_with_retries(task: str, img: str) -> str`: Run the model with retries in case of an error.
|
||||
|
||||
- `run_batch_with_retries(tasks_images: List[Tuple[str, str]]) -> List[str]`: Run a batch of tasks with retries in case of errors and return a list of responses.
|
||||
|
||||
- `_tokens_per_second() -> float`: Calculate the tokens generated per second during text generation.
|
||||
|
||||
- `_time_for_generation(task: str) -> float`: Measure the time taken for text generation for a specific task.
|
||||
|
||||
- `generate_summary(text: str) -> str`: Generate a summary of the provided text.
|
||||
|
||||
- `set_temperature(value: float)`: Set the temperature parameter for controlling randomness in text generation.
|
||||
|
||||
- `set_max_tokens(value: int)`: Set the maximum number of tokens allowed in generated responses.
|
||||
|
||||
- `get_generation_time() -> float`: Get the time taken for text generation for the last task.
|
||||
|
||||
- `get_chat_history() -> List[str]`: Get the chat history, including all interactions.
|
||||
|
||||
- `get_unique_chat_history() -> List[str]`: Get the unique chat history, removing duplicate interactions.
|
||||
|
||||
- `get_chat_history_length() -> int`: Get the length of the chat history.
|
||||
|
||||
- `get_unique_chat_history_length() -> int`: Get the length of the unique chat history.
|
||||
|
||||
- `get_chat_history_tokens() -> int`: Get the total number of tokens in the chat history.
|
||||
|
||||
- `print_beautiful(content: str, color: str = 'cyan')`: Print content beautifully using colored text.
|
||||
|
||||
- `stream(content: str)`: Stream the content, printing it character by character.
|
||||
|
||||
- `meta_prompt() -> str`: Get the meta prompt that provides guidance for including image labels in responses.
|
||||
|
||||
## 5. Usage Examples <a name="usage-examples"></a>
|
||||
|
||||
Let's explore some usage examples of the MultiModalAI library:
|
||||
|
||||
### Example 1: Running
|
||||
|
||||
the Model
|
||||
|
||||
```python
|
||||
# Import the library
|
||||
from swarms.models import BaseMultiModalModel
|
||||
|
||||
# Create an instance of the model
|
||||
model = BaseMultiModalModel(
|
||||
model_name="your_model_name",
|
||||
temperature=0.5,
|
||||
max_tokens=500,
|
||||
device="cuda",
|
||||
)
|
||||
|
||||
# Run the model with a text task and an image URL
|
||||
response = model.run("Generate a summary of this text", "https://www.example.com/image.jpg")
|
||||
print(response)
|
||||
```
|
||||
|
||||
### Example 2: Running Multiple Tasks Concurrently
|
||||
|
||||
```python
|
||||
# Import the library
|
||||
from swarms.models import BaseMultiModalModel
|
||||
|
||||
# Create an instance of the model
|
||||
model = BaseMultiModalModel(
|
||||
model_name="your_model_name",
|
||||
temperature=0.5,
|
||||
max_tokens=500,
|
||||
max_workers=4,
|
||||
device="cuda",
|
||||
)
|
||||
|
||||
# Define a list of tasks and image URLs
|
||||
tasks = ["Task 1", "Task 2", "Task 3"]
|
||||
images = ["https://image1.jpg", "https://image2.jpg", "https://image3.jpg"]
|
||||
|
||||
# Run the model on multiple tasks concurrently
|
||||
responses = model.run_many(tasks, images)
|
||||
for response in responses:
|
||||
print(response)
|
||||
```
|
||||
|
||||
### Example 3: Running the Model Asynchronously
|
||||
|
||||
```python
|
||||
# Import the library
|
||||
from swarms.models import BaseMultiModalModel
|
||||
|
||||
# Create an instance of the model
|
||||
model = BaseMultiModalModel(
|
||||
model_name="your_model_name",
|
||||
temperature=0.5,
|
||||
max_tokens=500,
|
||||
device="cuda",
|
||||
)
|
||||
|
||||
# Define a list of tasks and image URLs
|
||||
tasks_images = [
|
||||
("Task 1", "https://image1.jpg"),
|
||||
("Task 2", "https://image2.jpg"),
|
||||
("Task 3", "https://image3.jpg"),
|
||||
]
|
||||
|
||||
# Run the model on multiple tasks asynchronously
|
||||
responses = model.run_batch_async(tasks_images)
|
||||
for response in responses:
|
||||
print(response)
|
||||
```
|
||||
|
||||
### Example 4: Inheriting `BaseMultiModalModel` for it's prebuilt classes
|
||||
```python
|
||||
from swarms.models import BaseMultiModalModel
|
||||
|
||||
class CustomMultiModalModel(BaseMultiModalModel):
|
||||
def __init__(self, model_name, custom_parameter, *args, **kwargs):
|
||||
# Call the parent class constructor
|
||||
super().__init__(model_name=model_name, *args, **kwargs)
|
||||
# Initialize custom parameters specific to your model
|
||||
self.custom_parameter = custom_parameter
|
||||
|
||||
def __call__(self, text, img):
|
||||
# Implement the multimodal model logic here
|
||||
# You can use self.custom_parameter and other inherited attributes
|
||||
pass
|
||||
|
||||
def generate_summary(self, text):
|
||||
# Implement the summary generation logic using your model
|
||||
# You can use self.custom_parameter and other inherited attributes
|
||||
pass
|
||||
|
||||
# Create an instance of your custom multimodal model
|
||||
custom_model = CustomMultiModalModel(
|
||||
model_name="your_custom_model_name",
|
||||
custom_parameter="your_custom_value",
|
||||
temperature=0.5,
|
||||
max_tokens=500,
|
||||
device="cuda",
|
||||
)
|
||||
|
||||
# Run your custom model
|
||||
response = custom_model.run("Generate a summary of this text", "https://www.example.com/image.jpg")
|
||||
print(response)
|
||||
|
||||
# Generate a summary using your custom model
|
||||
summary = custom_model.generate_summary("This is a sample text to summarize.")
|
||||
print(summary)
|
||||
```
|
||||
|
||||
In the code above:
|
||||
|
||||
1. We define a `CustomMultiModalModel` class that inherits from `BaseMultiModalModel`.
|
||||
|
||||
2. In the constructor of our custom class, we call the parent class constructor using `super()` and initialize any custom parameters specific to our model. In this example, we introduced a `custom_parameter`.
|
||||
|
||||
3. We override the `__call__` method, which is responsible for running the multimodal model logic. Here, you can implement the specific behavior of your model, considering both text and image inputs.
|
||||
|
||||
4. We override the `generate_summary` method, which is used to generate a summary of text input. You can implement your custom summarization logic here.
|
||||
|
||||
5. We create an instance of our custom model, passing the required parameters, including the custom parameter.
|
||||
|
||||
6. We demonstrate how to run the custom model and generate a summary using it.
|
||||
|
||||
By inheriting from `BaseMultiModalModel`, you can leverage the prebuilt features and methods provided by the library while customizing the behavior of your multimodal model. This allows you to create powerful and specialized models for various multimodal tasks.
|
||||
|
||||
These examples demonstrate how to use MultiModalAI to run multimodal models with text and image inputs. You can adjust the parameters and methods to suit your specific use cases.
|
||||
|
||||
## 6. Additional Tips <a name="additional-tips"></a>
|
||||
|
||||
Here are some additional tips and considerations for using MultiModalAI effectively:
|
||||
|
||||
- **Custom Models**: You can create your own multimodal models and inherit from the `BaseMultiModalModel` class to integrate them with this library.
|
||||
|
||||
- **Retries**: In cases where text generation might fail due to various reasons (e.g., server issues), using methods with retries can be helpful.
|
||||
|
||||
- **Monitoring**: You can monitor the performance of your model using methods like `_tokens_per_second()` and `_time_for_generation()`.
|
||||
|
||||
- **Chat History**: The library maintains a chat history, allowing you to keep track of interactions.
|
||||
|
||||
- **Streaming**: The `stream()` method can be useful for displaying output character by character, which can be helpful for certain applications.
|
||||
|
||||
## 7. References and Resources <a name="references-and-resources"></a>
|
||||
|
||||
Here are some references and resources that you may find useful for working with multimodal models:
|
||||
|
||||
- [Hugging Face Transformers Library](https://huggingface.co/transformers/): A library for working with various transformer-based models.
|
||||
|
||||
- [PIL (Python Imaging Library)](https://pillow.readthedocs.io/en/stable/): Documentation for working with images in Python using the Pillow library.
|
||||
|
||||
- [Concurrent Programming in Python](https://docs.python.org/3/library/concurrent.futures.html): Official Python documentation for concurrent programming.
|
||||
|
||||
- [Requests Library Documentation](https://docs.python-requests.org/en/latest/): Documentation for the Requests library, which is used for making HTTP requests.
|
||||
|
||||
- [Base64 Encoding in Python](https://docs.python.org/3/library/base64.html): Official Python documentation for base64 encoding and decoding.
|
||||
|
||||
This concludes the documentation for the MultiModalAI library. You can now explore the library further and integrate it with your multimodal AI projects.
|
@ -1,251 +1,201 @@
|
||||
# `GPT4Vision` Documentation
|
||||
# `GPT4VisionAPI` Documentation
|
||||
|
||||
## Table of Contents
|
||||
- [Overview](#overview)
|
||||
**Table of Contents**
|
||||
- [Introduction](#introduction)
|
||||
- [Installation](#installation)
|
||||
- [Initialization](#initialization)
|
||||
- [Methods](#methods)
|
||||
- [process_img](#process_img)
|
||||
- [__call__](#__call__)
|
||||
- [run](#run)
|
||||
- [arun](#arun)
|
||||
- [Configuration Options](#configuration-options)
|
||||
- [Usage Examples](#usage-examples)
|
||||
- [Additional Tips](#additional-tips)
|
||||
- [References and Resources](#references-and-resources)
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
The GPT4Vision Model API is designed to provide an easy-to-use interface for interacting with the OpenAI GPT-4 Vision model. This model can generate textual descriptions for images and answer questions related to visual content. Whether you want to describe images or perform other vision-related tasks, GPT4Vision makes it simple and efficient.
|
||||
|
||||
The library offers a straightforward way to send images and tasks to the GPT-4 Vision model and retrieve the generated responses. It handles API communication, authentication, and retries, making it a powerful tool for developers working with computer vision and natural language processing tasks.
|
||||
- [Module Overview](#module-overview)
|
||||
- [Class: GPT4VisionAPI](#class-gpt4visionapi)
|
||||
- [Initialization](#initialization)
|
||||
- [Methods](#methods)
|
||||
- [encode_image](#encode_image)
|
||||
- [run](#run)
|
||||
- [__call__](#__call__)
|
||||
- [Examples](#examples)
|
||||
- [Example 1: Basic Usage](#example-1-basic-usage)
|
||||
- [Example 2: Custom API Key](#example-2-custom-api-key)
|
||||
- [Example 3: Adjusting Maximum Tokens](#example-3-adjusting-maximum-tokens)
|
||||
- [Additional Information](#additional-information)
|
||||
- [References](#references)
|
||||
|
||||
## Introduction<a name="introduction"></a>
|
||||
|
||||
Welcome to the documentation for the `GPT4VisionAPI` module! This module is a powerful wrapper for the OpenAI GPT-4 Vision model. It allows you to interact with the model to generate descriptions or answers related to images. This documentation will provide you with comprehensive information on how to use this module effectively.
|
||||
|
||||
## Installation<a name="installation"></a>
|
||||
|
||||
Before you start using the `GPT4VisionAPI` module, make sure you have the required dependencies installed. You can install them using the following commands:
|
||||
|
||||
```bash
|
||||
pip3 install --upgrade swarms
|
||||
```
|
||||
|
||||
## Installation
|
||||
## Module Overview<a name="module-overview"></a>
|
||||
|
||||
To use the GPT4Vision Model API, you need to install the required dependencies and configure your environment. Follow these steps to get started:
|
||||
The `GPT4VisionAPI` module serves as a bridge between your application and the OpenAI GPT-4 Vision model. It allows you to send requests to the model and retrieve responses related to images. Here are some key features and functionality provided by this module:
|
||||
|
||||
1. Install the required Python package:
|
||||
- Encoding images to base64 format.
|
||||
- Running the GPT-4 Vision model with specified tasks and images.
|
||||
- Customization options such as setting the OpenAI API key and maximum token limit.
|
||||
|
||||
```bash
|
||||
pip3 install --upgrade swarms
|
||||
```
|
||||
## Class: GPT4VisionAPI<a name="class-gpt4visionapi"></a>
|
||||
|
||||
2. Make sure you have an OpenAI API key. You can obtain one by signing up on the [OpenAI platform](https://beta.openai.com/signup/).
|
||||
The `GPT4VisionAPI` class is the core component of this module. It encapsulates the functionality required to interact with the GPT-4 Vision model. Below, we'll dive into the class in detail.
|
||||
|
||||
3. Set your OpenAI API key as an environment variable. You can do this in your code or your environment configuration. Alternatively, you can provide the API key directly when initializing the `GPT4Vision` class.
|
||||
### Initialization<a name="initialization"></a>
|
||||
|
||||
## Initialization
|
||||
When initializing the `GPT4VisionAPI` class, you have the option to provide the OpenAI API key and set the maximum token limit. Here are the parameters and their descriptions:
|
||||
|
||||
To start using the GPT4Vision Model API, you need to create an instance of the `GPT4Vision` class. You can customize its behavior by providing various configuration options, but it also comes with sensible defaults.
|
||||
| Parameter | Type | Default Value | Description |
|
||||
|---------------------|----------|-------------------------------|----------------------------------------------------------------------------------------------------------|
|
||||
| openai_api_key | str | `OPENAI_API_KEY` environment variable (if available) | The OpenAI API key. If not provided, it defaults to the `OPENAI_API_KEY` environment variable. |
|
||||
| max_tokens | int | 300 | The maximum number of tokens to generate in the model's response. |
|
||||
|
||||
Here's how you can initialize the `GPT4Vision` class:
|
||||
Here's how you can initialize the `GPT4VisionAPI` class:
|
||||
|
||||
```python
|
||||
from swarms.models.gpt4v import GPT4Vision
|
||||
|
||||
gpt4vision = GPT4Vision(
|
||||
api_key="Your Key"
|
||||
)
|
||||
```
|
||||
|
||||
The above code initializes the `GPT4Vision` class with default settings. You can adjust these settings as needed.
|
||||
|
||||
## Methods
|
||||
|
||||
### `process_img`
|
||||
from swarms.models import GPT4VisionAPI
|
||||
|
||||
The `process_img` method is used to preprocess an image before sending it to the GPT-4 Vision model. It takes the image path as input and returns the processed image in a format suitable for API requests.
|
||||
# Initialize with default API key and max_tokens
|
||||
api = GPT4VisionAPI()
|
||||
|
||||
```python
|
||||
processed_img = gpt4vision.process_img(img_path)
|
||||
# Initialize with custom API key and max_tokens
|
||||
custom_api_key = "your_custom_api_key"
|
||||
api = GPT4VisionAPI(openai_api_key=custom_api_key, max_tokens=500)
|
||||
```
|
||||
|
||||
- `img_path` (str): The file path or URL of the image to be processed.
|
||||
### Methods<a name="methods"></a>
|
||||
|
||||
### `__call__`
|
||||
#### encode_image<a name="encode_image"></a>
|
||||
|
||||
The `__call__` method is the main method for interacting with the GPT-4 Vision model. It sends the image and tasks to the model and returns the generated response.
|
||||
This method allows you to encode an image from a URL to base64 format. It's a utility function used internally by the module.
|
||||
|
||||
```python
|
||||
response = gpt4vision(img, tasks)
|
||||
```
|
||||
|
||||
- `img` (Union[str, List[str]]): Either a single image URL or a list of image URLs to be used for the API request.
|
||||
- `tasks` (List[str]): A list of tasks or questions related to the image(s).
|
||||
|
||||
This method returns a `GPT4VisionResponse` object, which contains the generated answer.
|
||||
|
||||
### `run`
|
||||
def encode_image(img: str) -> str:
|
||||
"""
|
||||
Encode image to base64.
|
||||
|
||||
The `run` method is an alternative way to interact with the GPT-4 Vision model. It takes a single task and image URL as input and returns the generated response.
|
||||
Parameters:
|
||||
- img (str): URL of the image to encode.
|
||||
|
||||
```python
|
||||
response = gpt4vision.run(task, img)
|
||||
Returns:
|
||||
str: Base64 encoded image.
|
||||
"""
|
||||
```
|
||||
|
||||
- `task` (str): The task or question related to the image.
|
||||
- `img` (str): The image URL to be used for the API request.
|
||||
|
||||
This method simplifies interactions when dealing with a single task and image.
|
||||
|
||||
### `arun`
|
||||
#### run<a name="run"></a>
|
||||
|
||||
The `arun` method is an asynchronous version of the `run` method. It allows for asynchronous processing of API requests, which can be useful in certain scenarios.
|
||||
The `run` method is the primary way to interact with the GPT-4 Vision model. It sends a request to the model with a task and an image URL, and it returns the model's response.
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
def run(task: str, img: str) -> str:
|
||||
"""
|
||||
Run the GPT-4 Vision model.
|
||||
|
||||
async def main():
|
||||
response = await gpt4vision.arun(task, img)
|
||||
print(response)
|
||||
Parameters:
|
||||
- task (str): The task or question related to the image.
|
||||
- img (str): URL of the image to analyze.
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
Returns:
|
||||
str: The model's response.
|
||||
"""
|
||||
```
|
||||
|
||||
- `task` (str): The task or question related to the image.
|
||||
- `img` (str): The image URL to be used for the API request.
|
||||
|
||||
## Configuration Options
|
||||
|
||||
The `GPT4Vision` class provides several configuration options that allow you to customize its behavior:
|
||||
#### __call__<a name="__call__"></a>
|
||||
|
||||
- `max_retries` (int): The maximum number of retries to make to the API. Default: 3
|
||||
- `backoff_factor` (float): The backoff factor to use for exponential backoff. Default: 2.0
|
||||
- `timeout_seconds` (int): The timeout in seconds for the API request. Default: 10
|
||||
- `api_key` (str): The API key to use for the API request. Default: None (set via environment variable)
|
||||
- `quality` (str): The quality of the image to generate. Options: 'low' or 'high'. Default: 'low'
|
||||
- `max_tokens` (int): The maximum number of tokens to use for the API request. Default: 200
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Example 1: Generating Image Descriptions
|
||||
|
||||
```python
|
||||
gpt4vision = GPT4Vision()
|
||||
img = "https://example.com/image.jpg"
|
||||
tasks = ["Describe this image."]
|
||||
response = gpt4vision(img, tasks)
|
||||
print(response.answer)
|
||||
```
|
||||
|
||||
In this example, we create an instance of `GPT4Vision`, provide an image URL, and ask the model to describe the image. The response contains the generated description.
|
||||
|
||||
### Example 2: Custom Configuration
|
||||
The `__call__` method is a convenient way to run the GPT-4 Vision model. It has the same functionality as the `run` method.
|
||||
|
||||
```python
|
||||
custom_config = {
|
||||
"max_retries": 5,
|
||||
"timeout_seconds": 20,
|
||||
"quality": "high",
|
||||
"max_tokens": 300,
|
||||
}
|
||||
gpt4vision = GPT4Vision(**custom_config)
|
||||
img = "https://example.com/another_image.jpg"
|
||||
tasks = ["What objects can you identify in this image?"]
|
||||
response = gpt4vision(img, tasks)
|
||||
print(response.answer)
|
||||
```
|
||||
def __call__(task: str, img: str) -> str:
|
||||
"""
|
||||
Run the GPT-4 Vision model (callable).
|
||||
|
||||
In this example, we create an instance of `GPT4Vision` with custom configuration options. We set a higher timeout, request high-quality images, and allow more tokens in the response.
|
||||
Parameters:
|
||||
- task (str): The task or question related to the image.
|
||||
- img
|
||||
|
||||
### Example 3: Using the `run` Method
|
||||
(str): URL of the image to analyze.
|
||||
|
||||
```python
|
||||
gpt4vision = GPT4Vision()
|
||||
img = "https://example.com/image.jpg"
|
||||
task = "Describe this image in detail."
|
||||
response = gpt4vision.run(task, img)
|
||||
print(response)
|
||||
Returns:
|
||||
str: The model's response.
|
||||
"""
|
||||
```
|
||||
|
||||
In this example, we use the `run` method to simplify the interaction by providing a single task and image URL.
|
||||
## Examples<a name="examples"></a>
|
||||
|
||||
# Model Usage and Image Understanding
|
||||
Let's explore some usage examples of the `GPT4VisionAPI` module to better understand how to use it effectively.
|
||||
|
||||
The GPT-4 Vision model processes images in a unique way, allowing it to answer questions about both or each of the images independently. Here's an overview:
|
||||
### Example 1: Basic Usage<a name="example-1-basic-usage"></a>
|
||||
|
||||
| Purpose | Description |
|
||||
| --------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
|
||||
| Image Understanding | The model is shown two copies of the same image and can answer questions about both or each of the images independently. |
|
||||
In this example, we'll use the module with the default API key and maximum tokens to analyze an image.
|
||||
|
||||
# Image Detail Control
|
||||
```python
|
||||
from swarms.models import GPT4VisionAPI
|
||||
|
||||
You have control over how the model processes the image and generates textual understanding by using the `detail` parameter, which has two options: `low` and `high`.
|
||||
# Initialize with default API key and max_tokens
|
||||
api = GPT4VisionAPI()
|
||||
|
||||
| Detail | Description |
|
||||
| -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| low | Disables the "high-res" model. The model receives a low-res 512 x 512 version of the image and represents the image with a budget of 65 tokens. Ideal for use cases not requiring high detail. |
|
||||
| high | Enables "high-res" mode. The model first sees the low-res image and then creates detailed crops of input images as 512px squares based on the input image size. Uses a total of 129 tokens. |
|
||||
# Define the task and image URL
|
||||
task = "What is the color of the object?"
|
||||
img = "https://i.imgur.com/2M2ZGwC.jpeg"
|
||||
|
||||
# Managing Images
|
||||
# Run the GPT-4 Vision model
|
||||
response = api.run(task, img)
|
||||
|
||||
To use the Chat Completions API effectively, you must manage the images you pass to the model. Here are some key considerations:
|
||||
# Print the model's response
|
||||
print(response)
|
||||
```
|
||||
|
||||
| Management Aspect | Description |
|
||||
| ------------------------- | ------------------------------------------------------------------------------------------------- |
|
||||
| Image Reuse | To pass the same image multiple times, include the image with each API request. |
|
||||
| Image Size Optimization | Improve latency by downsizing images to meet the expected size requirements. |
|
||||
| Image Deletion | After processing, images are deleted from OpenAI servers and not retained. No data is used for training. |
|
||||
### Example 2: Custom API Key<a name="example-2-custom-api-key"></a>
|
||||
|
||||
# Limitations
|
||||
If you have a custom API key, you can initialize the module with it as shown in this example.
|
||||
|
||||
While GPT-4 with Vision is powerful, it has some limitations:
|
||||
```python
|
||||
from swarms.models import GPT4VisionAPI
|
||||
|
||||
| Limitation | Description |
|
||||
| -------------------------------------------- | --------------------------------------------------------------------------------------------------- |
|
||||
| Medical Images | Not suitable for interpreting specialized medical images like CT scans. |
|
||||
| Non-English Text | May not perform optimally when handling non-Latin alphabets, such as Japanese or Korean. |
|
||||
| Large Text in Images | Enlarge text within images for readability, but avoid cropping important details. |
|
||||
| Rotated or Upside-Down Text/Images | May misinterpret rotated or upside-down text or images. |
|
||||
| Complex Visual Elements | May struggle to understand complex graphs or text with varying colors or styles. |
|
||||
| Spatial Reasoning | Struggles with tasks requiring precise spatial localization, such as identifying chess positions. |
|
||||
| Accuracy | May generate incorrect descriptions or captions in certain scenarios. |
|
||||
| Panoramic and Fisheye Images | Struggles with panoramic and fisheye images. |
|
||||
# Initialize with custom API key and max_tokens
|
||||
custom_api_key = "your_custom_api_key"
|
||||
api = GPT4VisionAPI(openai_api_key=custom_api_key, max_tokens=500)
|
||||
|
||||
# Calculating Costs
|
||||
# Define the task and image URL
|
||||
task = "What is the object in the image?"
|
||||
img = "https://i.imgur.com/3T3ZHwD.jpeg"
|
||||
|
||||
Image inputs are metered and charged in tokens. The token cost depends on the image size and detail option.
|
||||
# Run the GPT-4 Vision model
|
||||
response = api.run(task, img)
|
||||
|
||||
| Example | Token Cost |
|
||||
| --------------------------------------------- | ----------- |
|
||||
| 1024 x 1024 square image in detail: high mode | 765 tokens |
|
||||
| 2048 x 4096 image in detail: high mode | 1105 tokens |
|
||||
| 4096 x 8192 image in detail: low mode | 85 tokens |
|
||||
# Print the model's response
|
||||
print(response)
|
||||
```
|
||||
|
||||
# FAQ
|
||||
### Example 3: Adjusting Maximum Tokens<a name="example-3-adjusting-maximum-tokens"></a>
|
||||
|
||||
Here are some frequently asked questions about GPT-4 with Vision:
|
||||
You can also customize the maximum token limit when initializing the module. In this example, we set it to 1000 tokens.
|
||||
|
||||
| Question | Answer |
|
||||
| -------------------------------------------- | -------------------------------------------------------------------------------------------------- |
|
||||
| Fine-Tuning Image Capabilities | No, fine-tuning the image capabilities of GPT-4 is not supported at this time. |
|
||||
| Generating Images | GPT-4 is used for understanding images, not generating them. |
|
||||
| Supported Image File Types | Supported image file types include PNG (.png), JPEG (.jpeg and .jpg), WEBP (.webp), and non-animated GIF (.gif). |
|
||||
| Image Size Limitations | Image uploads are restricted to 20MB per image. |
|
||||
| Image Deletion | Uploaded images are automatically deleted after processing by the model. |
|
||||
| Learning More | For more details about GPT-4 with Vision, refer to the GPT-4 with Vision system card. |
|
||||
| CAPTCHA Submission | CAPTCHAs are blocked for safety reasons. |
|
||||
| Rate Limits | Image processing counts toward your tokens per minute (TPM) limit. Refer to the calculating costs section for details. |
|
||||
| Image Metadata | The model does not receive image metadata. |
|
||||
| Handling Unclear Images | If an image is unclear, the model will do its best to interpret it, but results may be less accurate. |
|
||||
```python
|
||||
from swarms.models import GPT4VisionAPI
|
||||
|
||||
# Initialize with default API key and custom max_tokens
|
||||
api = GPT4VisionAPI(max_tokens=1000)
|
||||
|
||||
# Define the task and image URL
|
||||
task = "Describe the scene in the image."
|
||||
img = "https://i.imgur.com/4P4ZRxU.jpeg"
|
||||
|
||||
## Additional Tips
|
||||
# Run the GPT-4 Vision model
|
||||
response = api.run(task, img)
|
||||
|
||||
- Make sure to handle potential exceptions and errors when making API requests. The library includes retries and error handling, but it's essential to handle exceptions gracefully in your code.
|
||||
- Experiment with different configuration options to optimize the trade-off between response quality and response time based on your specific requirements.
|
||||
# Print the model's response
|
||||
print(response)
|
||||
```
|
||||
|
||||
## References and Resources
|
||||
## Additional Information<a name="additional-information"></a>
|
||||
|
||||
- [OpenAI Platform](https://beta.openai.com/signup/): Sign up for an OpenAI API key.
|
||||
- [OpenAI API Documentation](https://platform.openai.com/docs/api-reference/chat/create): Official API documentation for the GPT-4 Vision model.
|
||||
- If you encounter any errors or issues with the module, make sure to check your API key and internet connectivity.
|
||||
- It's recommended to handle exceptions when using the module to gracefully handle errors.
|
||||
- You can further customize the module to fit your specific use case by modifying the code as needed.
|
||||
|
||||
Now you have a comprehensive understanding of the GPT4Vision Model API, its configuration options, and how to use it for various computer vision and natural language processing tasks. Start experimenting and integrating it into your projects to leverage the power of GPT-4 Vision for image-related tasks.
|
||||
## References<a name="references"></a>
|
||||
|
||||
# Conclusion
|
||||
- [OpenAI API Documentation](https://beta.openai.com/docs/)
|
||||
|
||||
With GPT-4 Vision, you have a powerful tool for understanding and generating textual descriptions for images. By considering its capabilities, limitations, and cost calculations, you can effectively leverage this model for various image-related tasks.
|
||||
This documentation provides a comprehensive guide on how to use the `GPT4VisionAPI` module effectively. It covers initialization, methods, usage examples, and additional information to ensure a smooth experience when working with the GPT-4 Vision model.
|
@ -1,37 +1,33 @@
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Import the OpenAIChat model and the Agent struct
|
||||
from swarms.models import OpenAIChat
|
||||
from swarms.structs import Flow
|
||||
from swarms.structs import Agent
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the language model, this model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC
|
||||
# Get the API key from the environment
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
# Initialize the language model
|
||||
llm = OpenAIChat(
|
||||
# model_name="gpt-4"
|
||||
# openai_api_key=api_key,
|
||||
temperature=0.5,
|
||||
# max_tokens=100,
|
||||
model_name="gpt-4",
|
||||
openai_api_key=api_key,
|
||||
)
|
||||
|
||||
|
||||
## Initialize the workflow
|
||||
flow = Flow(
|
||||
agent = Agent(
|
||||
llm=llm,
|
||||
max_loops=2,
|
||||
max_loops=1,
|
||||
autosave=True,
|
||||
dashboard=True,
|
||||
# tools=[search_api]
|
||||
# stopping_condition=None, # You can define a stopping condition as needed.
|
||||
# loop_interval=1,
|
||||
# retry_attempts=3,
|
||||
# retry_interval=1,
|
||||
# interactive=False, # Set to 'True' for interactive mode.
|
||||
# dynamic_temperature=False, # Set to 'True' for dynamic temperature handling.
|
||||
)
|
||||
|
||||
# out = flow.load_state("flow_state.json")
|
||||
# temp = flow.dynamic_temperature()
|
||||
# filter = flow.add_response_filter("Trump")
|
||||
out = flow.run(
|
||||
"Generate a 10,000 word blog on mental clarity and the benefits of meditation."
|
||||
)
|
||||
# out = flow.validate_response(out)
|
||||
# out = flow.analyze_feedback(out)
|
||||
# out = flow.print_history_and_memory()
|
||||
# # out = flow.save_state("flow_state.json")
|
||||
# print(out)
|
||||
# Run the workflow on a task
|
||||
out = agent.run("Generate a 10,000 word blog on health and wellness.")
|
||||
print(out)
|
||||
|
@ -0,0 +1,34 @@
|
||||
# Description: This is an example of how to use the Agent class to run a multi-modal workflow
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from swarms.models.gpt4_vision_api import GPT4VisionAPI
|
||||
from swarms.structs import Agent
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Get the API key from the environment
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
# Initialize the language model
|
||||
llm = GPT4VisionAPI(
|
||||
openai_api_key=api_key,
|
||||
max_tokens=500,
|
||||
)
|
||||
|
||||
# Initialize the language model
|
||||
task = "What is the color of the object?"
|
||||
img = "images/swarms.jpeg"
|
||||
|
||||
## Initialize the workflow
|
||||
agent = Agent(
|
||||
llm=llm,
|
||||
max_loops="auto",
|
||||
autosave=True,
|
||||
dashboard=True,
|
||||
multi_modal=True,
|
||||
)
|
||||
|
||||
# Run the workflow on a task
|
||||
out = agent.run(task=task, img=img)
|
||||
print(out)
|
@ -1,22 +0,0 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from swarms.models.revgptV4 import RevChatGPTModel
|
||||
from swarms.workers.worker import Worker
|
||||
|
||||
load_dotenv()
|
||||
|
||||
config = {
|
||||
"model": os.getenv("REVGPT_MODEL"),
|
||||
"plugin_ids": [os.getenv("REVGPT_PLUGIN_IDS")],
|
||||
"disable_history": os.getenv("REVGPT_DISABLE_HISTORY") == "True",
|
||||
"PUID": os.getenv("REVGPT_PUID"),
|
||||
"unverified_plugin_domains": [os.getenv("REVGPT_UNVERIFIED_PLUGIN_DOMAINS")],
|
||||
}
|
||||
|
||||
llm = RevChatGPTModel(access_token=os.getenv("ACCESS_TOKEN"), **config)
|
||||
|
||||
worker = Worker(ai_name="Optimus Prime", llm=llm)
|
||||
|
||||
task = "What were the winning boston marathon times for the past 5 years (ending in 2022)? Generate a table of the year, name, country of origin, and times."
|
||||
response = worker.run(task)
|
||||
print(response)
|
@ -0,0 +1,102 @@
|
||||
import random
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from swarms.models import OpenAIChat
|
||||
from swarms.structs import Agent
|
||||
from swarms.models.stable_diffusion import StableDiffusion
|
||||
|
||||
load_dotenv()
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
stability_api_key = os.getenv("STABILITY_API_KEY")
|
||||
|
||||
# Initialize the language model and image generation model
|
||||
llm = OpenAIChat(
|
||||
openai_api_key=openai_api_key, temperature=0.5, max_tokens=3000
|
||||
)
|
||||
sd_api = StableDiffusion(api_key=stability_api_key)
|
||||
|
||||
|
||||
# Creative Concept Generator for Product Ads
|
||||
class ProductAdConceptGenerator:
|
||||
def __init__(self, product_name):
|
||||
self.product_name = product_name
|
||||
self.themes = [
|
||||
"futuristic",
|
||||
"rustic",
|
||||
"luxurious",
|
||||
"minimalistic",
|
||||
"vibrant",
|
||||
"elegant",
|
||||
"retro",
|
||||
"urban",
|
||||
"ethereal",
|
||||
"surreal",
|
||||
"artistic",
|
||||
"tech-savvy",
|
||||
"vintage",
|
||||
"natural",
|
||||
"sophisticated",
|
||||
"playful",
|
||||
"dynamic",
|
||||
"serene",
|
||||
"lasers,lightning",
|
||||
]
|
||||
self.contexts = [
|
||||
"in an everyday setting",
|
||||
"in a rave setting",
|
||||
"in an abstract environment",
|
||||
"in an adventurous context",
|
||||
"surrounded by nature",
|
||||
"in a high-tech setting",
|
||||
"in a historical context",
|
||||
"in a busy urban scene",
|
||||
"in a tranquil and peaceful setting",
|
||||
"against a backdrop of city lights",
|
||||
"in a surreal dreamscape",
|
||||
"in a festive atmosphere",
|
||||
"in a luxurious setting",
|
||||
"in a playful and colorful background",
|
||||
"in an ice cave setting",
|
||||
"in a serene and calm landscape",
|
||||
]
|
||||
self.contexts = [
|
||||
"high realism product ad (extremely creative)"
|
||||
]
|
||||
|
||||
def generate_concept(self):
|
||||
theme = random.choice(self.themes)
|
||||
context = random.choice(self.contexts)
|
||||
return (
|
||||
f"{theme} inside a {style} {self.product_name}, {context}"
|
||||
)
|
||||
|
||||
|
||||
# User input
|
||||
product_name = input(
|
||||
"Enter a product name for ad creation (e.g., 'PS5', 'AirPods',"
|
||||
" 'Kirkland Vodka'): "
|
||||
)
|
||||
|
||||
# Generate creative concept
|
||||
concept_generator = ProductAdConceptGenerator(product_name)
|
||||
creative_concept = concept_generator.generate_concept()
|
||||
|
||||
# Generate product image based on the creative concept
|
||||
image_paths = sd_api.run(creative_concept)
|
||||
|
||||
# Generate ad copy
|
||||
ad_copy_agent = Agent(llm=llm, max_loops=1)
|
||||
ad_copy_prompt = (
|
||||
f"Write a compelling {social_media_platform} ad copy for a"
|
||||
f" product photo showing {product_name} {creative_concept}."
|
||||
)
|
||||
ad_copy = ad_copy_agent.run(task=ad_copy_prompt)
|
||||
|
||||
# Output the results
|
||||
print("Creative Concept:", concept_result)
|
||||
print("Design Ideas:", design_result)
|
||||
print("Ad Copy:", copywriting_result)
|
||||
print(
|
||||
"Image Path:",
|
||||
image_paths[0] if image_paths else "No image generated",
|
||||
)
|
@ -0,0 +1,24 @@
|
||||
from swarms.structs import Agent
|
||||
from swarms.models.gpt4_vision_api import GPT4VisionAPI
|
||||
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
|
||||
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
|
||||
)
|
||||
|
||||
llm = GPT4VisionAPI()
|
||||
|
||||
task = (
|
||||
"Analyze this image of an assembly line and identify any issues"
|
||||
" such as misaligned parts, defects, or deviations from the"
|
||||
" standard assembly process. IF there is anything unsafe in the"
|
||||
" image, explain why it is unsafe and how it could be improved."
|
||||
)
|
||||
img = "assembly_line.jpg"
|
||||
|
||||
## Initialize the workflow
|
||||
agent = Agent(
|
||||
llm=llm,
|
||||
max_loops=1,
|
||||
dashboard=True,
|
||||
)
|
||||
|
||||
agent.run(task=task, img=img)
|
After Width: | Height: | Size: 532 KiB |
@ -0,0 +1,90 @@
|
||||
import re
|
||||
from swarms.models.openai_models import OpenAIChat
|
||||
|
||||
|
||||
class AutoTemp:
|
||||
"""
|
||||
AutoTemp is a tool for automatically selecting the best temperature setting for a given task.
|
||||
It generates responses at different temperatures, evaluates them, and ranks them based on quality.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key,
|
||||
default_temp=0.0,
|
||||
alt_temps=None,
|
||||
auto_select=True,
|
||||
max_workers=6,
|
||||
):
|
||||
self.api_key = api_key
|
||||
self.default_temp = default_temp
|
||||
self.alt_temps = (
|
||||
alt_temps if alt_temps else [0.4, 0.6, 0.8, 1.0, 1.2, 1.4]
|
||||
)
|
||||
self.auto_select = auto_select
|
||||
self.max_workers = max_workers
|
||||
self.llm = OpenAIChat(
|
||||
openai_api_key=self.api_key, temperature=self.default_temp
|
||||
)
|
||||
|
||||
def evaluate_output(self, output, temperature):
|
||||
print(f"Evaluating output at temperature {temperature}...")
|
||||
eval_prompt = f"""
|
||||
Evaluate the following output which was generated at a temperature setting of {temperature}. Provide a precise score from 0.0 to 100.0, considering the following criteria:
|
||||
|
||||
- Relevance: How well does the output address the prompt or task at hand?
|
||||
- Clarity: Is the output easy to understand and free of ambiguity?
|
||||
- Utility: How useful is the output for its intended purpose?
|
||||
- Pride: If the user had to submit this output to the world for their career, would they be proud?
|
||||
- Delight: Is the output likely to delight or positively surprise the user?
|
||||
|
||||
Be sure to comprehensively evaluate the output, it is very important for my career. Please answer with just the score with one decimal place accuracy, such as 42.0 or 96.9. Be extremely critical.
|
||||
|
||||
Output to evaluate:
|
||||
---
|
||||
{output}
|
||||
---
|
||||
"""
|
||||
score_text = self.llm(eval_prompt, temperature=0.5)
|
||||
score_match = re.search(r"\b\d+(\.\d)?\b", score_text)
|
||||
return (
|
||||
round(float(score_match.group()), 1)
|
||||
if score_match
|
||||
else 0.0
|
||||
)
|
||||
|
||||
def run(self, prompt, temperature_string):
|
||||
print("Starting generation process...")
|
||||
temperature_list = [
|
||||
float(temp.strip())
|
||||
for temp in temperature_string.split(",")
|
||||
if temp.strip()
|
||||
]
|
||||
outputs = {}
|
||||
scores = {}
|
||||
for temp in temperature_list:
|
||||
print(f"Generating at temperature {temp}...")
|
||||
output_text = self.llm(prompt, temperature=temp)
|
||||
if output_text:
|
||||
outputs[temp] = output_text
|
||||
scores[temp] = self.evaluate_output(output_text, temp)
|
||||
|
||||
print("Generation process complete.")
|
||||
if not scores:
|
||||
return "No valid outputs generated.", None
|
||||
|
||||
sorted_scores = sorted(
|
||||
scores.items(), key=lambda item: item[1], reverse=True
|
||||
)
|
||||
best_temp, best_score = sorted_scores[0]
|
||||
best_output = outputs[best_temp]
|
||||
|
||||
return (
|
||||
f"Best AutoTemp Output (Temp {best_temp} | Score:"
|
||||
f" {best_score}):\n{best_output}"
|
||||
if self.auto_select
|
||||
else "\n".join(
|
||||
f"Temp {temp} | Score: {score}:\n{outputs[temp]}"
|
||||
for temp, score in sorted_scores
|
||||
)
|
||||
)
|
@ -0,0 +1,22 @@
|
||||
from swarms.models import OpenAIChat
|
||||
from autotemp import AutoTemp
|
||||
|
||||
# Your OpenAI API key
|
||||
api_key = ""
|
||||
|
||||
autotemp_agent = AutoTemp(
|
||||
api_key=api_key,
|
||||
alt_temps=[0.4, 0.6, 0.8, 1.0, 1.2],
|
||||
auto_select=False,
|
||||
# model_version="gpt-3.5-turbo" # Specify the model version if needed
|
||||
)
|
||||
|
||||
# Define the task and temperature string
|
||||
task = "Generate a short story about a lost civilization."
|
||||
temperature_string = "0.4,0.6,0.8,1.0,1.2,"
|
||||
|
||||
# Run the AutoTempAgent
|
||||
result = autotemp_agent.run(task, temperature_string)
|
||||
|
||||
# Print the result
|
||||
print(result)
|
@ -0,0 +1,138 @@
|
||||
import os
|
||||
from termcolor import colored
|
||||
from swarms.models import OpenAIChat
|
||||
from autotemp import AutoTemp
|
||||
from swarms.structs import SequentialWorkflow
|
||||
|
||||
|
||||
class BlogGen:
|
||||
def __init__(
|
||||
self,
|
||||
api_key,
|
||||
blog_topic,
|
||||
temperature_range: str = "0.4,0.6,0.8,1.0,1.2",
|
||||
): # Add blog_topic as an argument
|
||||
self.openai_chat = OpenAIChat(
|
||||
openai_api_key=api_key, temperature=0.8
|
||||
)
|
||||
self.auto_temp = AutoTemp(api_key)
|
||||
self.temperature_range = temperature_range
|
||||
self.workflow = SequentialWorkflow(max_loops=5)
|
||||
|
||||
# Formatting the topic selection prompt with the user's topic
|
||||
self.TOPIC_SELECTION_SYSTEM_PROMPT = f"""
|
||||
Given the topic '{blog_topic}', generate an engaging and versatile blog topic. This topic should cover areas related to '{blog_topic}' and might include aspects such as current events, lifestyle, technology, health, and culture related to '{blog_topic}'. Identify trending subjects within this realm. The topic must be unique, thought-provoking, and have the potential to draw in readers interested in '{blog_topic}'.
|
||||
"""
|
||||
|
||||
self.DRAFT_WRITER_SYSTEM_PROMPT = """
|
||||
Create an engaging and comprehensive blog article of at least 1,000 words on '{{CHOSEN_TOPIC}}'. The content should be original, informative, and reflective of a human-like style, with a clear structure including headings and sub-headings. Incorporate a blend of narrative, factual data, expert insights, and anecdotes to enrich the article. Focus on SEO optimization by using relevant keywords, ensuring readability, and including meta descriptions and title tags. The article should provide value, appeal to both knowledgeable and general readers, and maintain a balance between depth and accessibility. Aim to make the article engaging and suitable for online audiences.
|
||||
"""
|
||||
|
||||
self.REVIEW_AGENT_SYSTEM_PROMPT = """
|
||||
Critically review the drafted blog article on '{{ARTICLE_TOPIC}}' to refine it to high-quality content suitable for online publication. Ensure the article is coherent, factually accurate, engaging, and optimized for search engines (SEO). Check for the effective use of keywords, readability, internal and external links, and the inclusion of meta descriptions and title tags. Edit the content to enhance clarity, impact, and maintain the authors voice. The goal is to polish the article into a professional, error-free piece that resonates with the target audience, adheres to publication standards, and is optimized for both search engines and social media sharing.
|
||||
"""
|
||||
|
||||
self.DISTRIBUTION_AGENT_SYSTEM_PROMPT = """
|
||||
Develop an autonomous distribution strategy for the blog article on '{{ARTICLE_TOPIC}}'. Utilize an API to post the article on a popular blog platform (e.g., WordPress, Blogger, Medium) commonly used by our target audience. Ensure the post includes all SEO elements like meta descriptions, title tags, and properly formatted content. Craft unique, engaging social media posts tailored to different platforms to promote the blog article. Schedule these posts to optimize reach and engagement, using data-driven insights. Monitor the performance of the distribution efforts, adjusting strategies based on engagement metrics and audience feedback. Aim to maximize the article's visibility, attract a diverse audience, and foster engagement across digital channels.
|
||||
"""
|
||||
|
||||
def run_workflow(self):
|
||||
try:
|
||||
# Topic generation using OpenAIChat
|
||||
topic_result = self.openai_chat.generate(
|
||||
[self.TOPIC_SELECTION_SYSTEM_PROMPT]
|
||||
)
|
||||
topic_output = topic_result.generations[0][0].text
|
||||
print(
|
||||
colored(
|
||||
(
|
||||
"\nTopic Selection Task"
|
||||
f" Output:\n----------------------------\n{topic_output}\n"
|
||||
),
|
||||
"white",
|
||||
)
|
||||
)
|
||||
|
||||
chosen_topic = topic_output.split("\n")[0]
|
||||
print(
|
||||
colored("Selected topic: " + chosen_topic, "yellow")
|
||||
)
|
||||
|
||||
# Initial draft generation with AutoTemp
|
||||
initial_draft_prompt = (
|
||||
self.DRAFT_WRITER_SYSTEM_PROMPT.replace(
|
||||
"{{CHOSEN_TOPIC}}", chosen_topic
|
||||
)
|
||||
)
|
||||
auto_temp_output = self.auto_temp.run(
|
||||
initial_draft_prompt, self.temperature_range
|
||||
)
|
||||
initial_draft_output = auto_temp_output # Assuming AutoTemp.run returns the best output directly
|
||||
print(
|
||||
colored(
|
||||
(
|
||||
"\nInitial Draft"
|
||||
f" Output:\n----------------------------\n{initial_draft_output}\n"
|
||||
),
|
||||
"white",
|
||||
)
|
||||
)
|
||||
|
||||
# Review process using OpenAIChat
|
||||
review_prompt = self.REVIEW_AGENT_SYSTEM_PROMPT.replace(
|
||||
"{{ARTICLE_TOPIC}}", chosen_topic
|
||||
)
|
||||
review_result = self.openai_chat.generate([review_prompt])
|
||||
review_output = review_result.generations[0][0].text
|
||||
print(
|
||||
colored(
|
||||
(
|
||||
"\nReview"
|
||||
f" Output:\n----------------------------\n{review_output}\n"
|
||||
),
|
||||
"white",
|
||||
)
|
||||
)
|
||||
|
||||
# Distribution preparation using OpenAIChat
|
||||
distribution_prompt = (
|
||||
self.DISTRIBUTION_AGENT_SYSTEM_PROMPT.replace(
|
||||
"{{ARTICLE_TOPIC}}", chosen_topic
|
||||
)
|
||||
)
|
||||
distribution_result = self.openai_chat.generate(
|
||||
[distribution_prompt]
|
||||
)
|
||||
distribution_output = distribution_result.generations[0][
|
||||
0
|
||||
].text
|
||||
print(
|
||||
colored(
|
||||
(
|
||||
"\nDistribution"
|
||||
f" Output:\n----------------------------\n{distribution_output}\n"
|
||||
),
|
||||
"white",
|
||||
)
|
||||
)
|
||||
|
||||
# Final compilation of the blog
|
||||
final_blog_content = f"{initial_draft_output}\n\n{review_output}\n\n{distribution_output}"
|
||||
print(
|
||||
colored(
|
||||
(
|
||||
"\nFinal Blog"
|
||||
f" Content:\n----------------------------\n{final_blog_content}\n"
|
||||
),
|
||||
"green",
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(colored(f"An error occurred: {str(e)}", "red"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
api_key = os.environ["OPENAI_API_KEY"]
|
||||
blog_generator = BlogGen(api_key)
|
||||
blog_generator.run_workflow()
|
@ -0,0 +1,25 @@
|
||||
import os
|
||||
from blog_gen import BlogGen
|
||||
|
||||
|
||||
def main():
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"OPENAI_API_KEY environment variable not set."
|
||||
)
|
||||
|
||||
blog_topic = input("Enter the topic for the blog generation: ")
|
||||
|
||||
blog_generator = BlogGen(api_key, blog_topic)
|
||||
blog_generator.TOPIC_SELECTION_SYSTEM_PROMPT = (
|
||||
blog_generator.TOPIC_SELECTION_SYSTEM_PROMPT.replace(
|
||||
"{{BLOG_TOPIC}}", blog_topic
|
||||
)
|
||||
)
|
||||
|
||||
blog_generator.run_workflow()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,63 @@
|
||||
"""
|
||||
Swarm of developers that write documentation and tests for a given code snippet.
|
||||
|
||||
This is a simple example of how to use the swarms library to create a swarm of developers that write documentation and tests for a given code snippet.
|
||||
|
||||
The swarm is composed of two agents:
|
||||
- Documentation agent: writes documentation for a given code snippet.
|
||||
- Tests agent: writes tests for a given code snippet.
|
||||
|
||||
The swarm is initialized with a language model that is used by the agents to generate text. In this example, we use the OpenAI GPT-3 language model.
|
||||
|
||||
Agent:
|
||||
Documentation agent -> Tests agent
|
||||
|
||||
|
||||
"""
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from swarms.models import OpenAIChat
|
||||
from swarms.prompts.programming import DOCUMENTATION_SOP, TEST_SOP
|
||||
from swarms.structs import Agent
|
||||
|
||||
load_dotenv()
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
|
||||
TASK = """
|
||||
CODE
|
||||
|
||||
"""
|
||||
|
||||
# Initialize the language model
|
||||
llm = OpenAIChat(openai_api_key=api_key, max_tokens=5000)
|
||||
|
||||
|
||||
# Documentation agent
|
||||
documentation_agent = Agent(
|
||||
llm=llm,
|
||||
sop=DOCUMENTATION_SOP,
|
||||
max_loops=1,
|
||||
)
|
||||
|
||||
|
||||
# Tests agent
|
||||
tests_agent = Agent(
|
||||
llm=llm,
|
||||
sop=TEST_SOP,
|
||||
max_loops=2,
|
||||
)
|
||||
|
||||
|
||||
# Run the documentation agent
|
||||
documentation = documentation_agent.run(
|
||||
f"Write documentation for the following code:{TASK}"
|
||||
)
|
||||
|
||||
# Run the tests agent
|
||||
tests = tests_agent.run(
|
||||
f"Write tests for the following code:{TASK} here is the"
|
||||
f" documentation: {documentation}"
|
||||
)
|
@ -0,0 +1,76 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from swarms.models import OpenAIChat
|
||||
from swarms.models.stable_diffusion import StableDiffusion
|
||||
from swarms.structs import Agent, SequentialWorkflow
|
||||
import swarms.prompts.education as edu_prompts
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
stability_api_key = os.getenv("STABILITY_API_KEY")
|
||||
|
||||
# Initialize language model
|
||||
llm = OpenAIChat(
|
||||
openai_api_key=api_key, temperature=0.5, max_tokens=3000
|
||||
)
|
||||
|
||||
# Initialize Stable Diffusion
|
||||
sd_api = StableDiffusion(api_key=stability_api_key)
|
||||
|
||||
# User preferences (can be dynamically set in a real application)
|
||||
user_preferences = {
|
||||
"subjects": "Cognitive Architectures",
|
||||
"learning_style": "Visual",
|
||||
"challenge_level": "Moderate",
|
||||
}
|
||||
|
||||
# Formatted prompts from user preferences
|
||||
curriculum_prompt = edu_prompts.CURRICULUM_DESIGN_PROMPT.format(
|
||||
**user_preferences
|
||||
)
|
||||
interactive_prompt = edu_prompts.INTERACTIVE_LEARNING_PROMPT.format(
|
||||
**user_preferences
|
||||
)
|
||||
sample_prompt = edu_prompts.SAMPLE_TEST_PROMPT.format(
|
||||
**user_preferences
|
||||
)
|
||||
image_prompt = edu_prompts.IMAGE_GENERATION_PROMPT.format(
|
||||
**user_preferences
|
||||
)
|
||||
|
||||
# Initialize agents for different educational tasks
|
||||
curriculum_agent = Agent(llm=llm, max_loops=1, sop=curriculum_prompt)
|
||||
interactive_learning_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=interactive_prompt
|
||||
)
|
||||
sample_lesson_agent = Agent(llm=llm, max_loops=1, sop=sample_prompt)
|
||||
|
||||
# Create Sequential Workflow
|
||||
workflow = SequentialWorkflow(max_loops=1)
|
||||
|
||||
# Add tasks to workflow with personalized prompts
|
||||
workflow.add(curriculum_agent, "Generate a curriculum")
|
||||
workflow.add(
|
||||
interactive_learning_agent, "Generate an interactive lesson"
|
||||
)
|
||||
workflow.add(sample_lesson_agent, "Generate a practice test")
|
||||
|
||||
# Execute the workflow for text-based tasks
|
||||
workflow.run()
|
||||
|
||||
# Generate an image using Stable Diffusion
|
||||
image_result = sd_api.run(image_prompt)
|
||||
|
||||
# Output results for each task
|
||||
for task in workflow.tasks:
|
||||
print(
|
||||
f"Task Description: {task.description}\nResult:"
|
||||
f" {task.result}\n"
|
||||
)
|
||||
|
||||
# Output image result
|
||||
print(
|
||||
"Image Generation Task: Generate an image for the interactive"
|
||||
f" lesson\nResult: {image_result}"
|
||||
)
|
@ -0,0 +1,149 @@
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from swarms.models import OpenAIChat
|
||||
from swarms.prompts.code_interpreter import CODE_INTERPRETER
|
||||
from swarms.structs import Agent
|
||||
from swarms.prompts.programming import TEST_SOP, DOCUMENTATION_SOP
|
||||
from termcolor import colored
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
FEATURE = (
|
||||
"Implement an all-new signup system in typescript using supabase"
|
||||
)
|
||||
|
||||
CODEBASE = """
|
||||
import React, { useState } from 'react';
|
||||
import UpperPanel from './UpperPanel';
|
||||
import LowerPanel from './LowerPanel';
|
||||
|
||||
const MainPanel = () => {
|
||||
const [promptInstructionForLowerPanel, setPromptInstructionForLowerPanel] = useState('');
|
||||
const [formData, setFormData] = useState('');
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
|
||||
return (
|
||||
<div className="flex h-screen">
|
||||
<UpperPanel setPromptInstructionForLowerPanel={setPromptInstructionForLowerPanel}
|
||||
isLoading={isLoading}
|
||||
setIsLoading={setIsLoading}
|
||||
/>
|
||||
<LowerPanel promptInstruction={promptInstructionForLowerPanel} isLoading={isLoading} />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default MainPanel;
|
||||
|
||||
|
||||
"""
|
||||
|
||||
# Load the environment variables
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
# Initialize the language agent
|
||||
llm = OpenAIChat(
|
||||
model_name="gpt-4",
|
||||
openai_api_key=api_key,
|
||||
temperature=0.5,
|
||||
max_tokens=4000,
|
||||
)
|
||||
|
||||
# Product Manager Agent init
|
||||
product_manager_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=CODE_INTERPRETER, autosave=True
|
||||
)
|
||||
|
||||
# Initialize the agent with the language agent
|
||||
feature_implementer_frontend = Agent(
|
||||
llm=llm, max_loops=1, sop=CODE_INTERPRETER, autosave=True
|
||||
)
|
||||
|
||||
# Create another agent for a different task
|
||||
feature_implementer_backend = Agent(
|
||||
llm=llm, max_loops=1, sop=CODE_INTERPRETER, autosave=True
|
||||
)
|
||||
|
||||
# Create another agent for a different task
|
||||
tester_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=TEST_SOP, autosave=True
|
||||
)
|
||||
|
||||
# Create another agent for a different task
|
||||
documenting_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=DOCUMENTATION_SOP, autosave=True
|
||||
)
|
||||
|
||||
|
||||
# Product Agent prompt
|
||||
def feature_codebase_product_agentprompt(
|
||||
feature: str, codebase: str
|
||||
) -> str:
|
||||
prompt = (
|
||||
"Create an algorithmic pseudocode for an all-new feature:"
|
||||
f" {feature} based on this codebase: {codebase}"
|
||||
)
|
||||
return prompt
|
||||
|
||||
|
||||
# Product Manager Agent
|
||||
product_manager_out = product_manager_agent.run(
|
||||
feature_codebase_product_agentprompt(FEATURE, CODEBASE)
|
||||
)
|
||||
print(
|
||||
colored(
|
||||
(
|
||||
"---------------------------- Product Manager Plan:"
|
||||
f" {product_manager_out}"
|
||||
),
|
||||
"cyan",
|
||||
)
|
||||
)
|
||||
|
||||
# Feature Implementer Agent
|
||||
agent1_out = feature_implementer_frontend.run(
|
||||
f"Create the backend code for {FEATURE} in markdown based off of"
|
||||
f" this algorithmic pseudocode: {product_manager_out} the logic"
|
||||
f" based on the following codebase: {CODEBASE}"
|
||||
)
|
||||
print(
|
||||
colored(
|
||||
(
|
||||
"--------------------- Feature Implementer Code logic:"
|
||||
f" {agent1_out}"
|
||||
),
|
||||
"cyan",
|
||||
)
|
||||
)
|
||||
|
||||
# Tester agent
|
||||
tester_agent_out = tester_agent.run(
|
||||
f"Create tests for the following code: {agent1_out}"
|
||||
)
|
||||
print(
|
||||
colored(
|
||||
(
|
||||
"---------------------------- Tests for the logic:"
|
||||
f" {tester_agent_out}"
|
||||
),
|
||||
"green",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# Documentation Agent
|
||||
documenter_agent_out = documenting_agent.run(
|
||||
f"Document the following code: {agent1_out}"
|
||||
)
|
||||
print(
|
||||
colored(
|
||||
(
|
||||
"---------------------------- Documentation for the"
|
||||
f" logic: {documenter_agent_out}"
|
||||
),
|
||||
"yellow",
|
||||
)
|
||||
)
|
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Define the base URL
|
||||
base_url="http://localhost:8000"
|
||||
|
||||
# Define the JSON payload
|
||||
payload='{"feature": "login system", "codebase": "existing codebase here"}'
|
||||
|
||||
# Send POST request
|
||||
echo "Sending request to /agent/ endpoint..."
|
||||
response=$(curl -s -X POST "$base_url/agent/" -H "Content-Type: application/json" -d "$payload")
|
||||
|
||||
echo "Response: $response"
|
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Define the base URL
|
||||
base_url="http://localhost:8000"
|
||||
|
||||
# Define the JSON payload
|
||||
payload='{"feature": "login system", "codebase": "existing codebase here"}'
|
||||
|
||||
# Send POST request
|
||||
echo "Sending request to /agent/ endpoint..."
|
||||
response=$(curl -s -X POST "$base_url/agent/" -H "Content-Type: application/json" -d "$payload")
|
||||
|
||||
echo "Response: $response"
|
@ -0,0 +1,20 @@
|
||||
from swarms.structs import Agent
|
||||
from swarms.models.gpt4_vision_api import GPT4VisionAPI
|
||||
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
|
||||
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
|
||||
)
|
||||
|
||||
|
||||
llm = GPT4VisionAPI()
|
||||
|
||||
task = "What is the color of the object?"
|
||||
img = "images/swarms.jpeg"
|
||||
|
||||
## Initialize the workflow
|
||||
agent = Agent(
|
||||
llm=llm,
|
||||
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
|
||||
max_loops="auto",
|
||||
)
|
||||
|
||||
agent.run(task=task, img=img)
|
After Width: | Height: | Size: 1.6 MiB |
@ -0,0 +1,85 @@
|
||||
from swarms.structs import Agent
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from swarms.models import GPT4VisionAPI
|
||||
from swarms.prompts.logistics import (
|
||||
Health_Security_Agent_Prompt,
|
||||
Quality_Control_Agent_Prompt,
|
||||
Productivity_Agent_Prompt,
|
||||
Safety_Agent_Prompt,
|
||||
Security_Agent_Prompt,
|
||||
Sustainability_Agent_Prompt,
|
||||
Efficiency_Agent_Prompt,
|
||||
)
|
||||
|
||||
load_dotenv()
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
llm = GPT4VisionAPI(openai_api_key=api_key)
|
||||
|
||||
# Image for analysis
|
||||
factory_image = "factory_image1.jpg"
|
||||
|
||||
# Initialize agents with respective prompts
|
||||
health_security_agent = Agent(
|
||||
llm=llm,
|
||||
sop=Health_Security_Agent_Prompt,
|
||||
max_loops=3,
|
||||
multi_modal=True,
|
||||
)
|
||||
quality_control_agent = Agent(
|
||||
llm=llm,
|
||||
sop=Quality_Control_Agent_Prompt,
|
||||
max_loops=3,
|
||||
multi_modal=True,
|
||||
)
|
||||
productivity_agent = Agent(
|
||||
llm=llm,
|
||||
sop=Productivity_Agent_Prompt,
|
||||
max_loops=3,
|
||||
multi_modal=True,
|
||||
)
|
||||
safety_agent = Agent(
|
||||
llm=llm, sop=Safety_Agent_Prompt, max_loops=3, multi_modal=True
|
||||
)
|
||||
security_agent = Agent(
|
||||
llm=llm, sop=Security_Agent_Prompt, max_loops=3, multi_modal=True
|
||||
)
|
||||
sustainability_agent = Agent(
|
||||
llm=llm,
|
||||
sop=Sustainability_Agent_Prompt,
|
||||
max_loops=3,
|
||||
multi_modal=True,
|
||||
)
|
||||
efficiency_agent = Agent(
|
||||
llm=llm,
|
||||
sop=Efficiency_Agent_Prompt,
|
||||
max_loops=3,
|
||||
multi_modal=True,
|
||||
)
|
||||
|
||||
# Run agents with respective tasks on the same image
|
||||
health_analysis = health_security_agent.run(
|
||||
"Analyze the safety of this factory", factory_image
|
||||
)
|
||||
quality_analysis = quality_control_agent.run(
|
||||
"Examine product quality in the factory", factory_image
|
||||
)
|
||||
productivity_analysis = productivity_agent.run(
|
||||
"Evaluate factory productivity", factory_image
|
||||
)
|
||||
safety_analysis = safety_agent.run(
|
||||
"Inspect the factory's adherence to safety standards",
|
||||
factory_image,
|
||||
)
|
||||
security_analysis = security_agent.run(
|
||||
"Assess the factory's security measures and systems",
|
||||
factory_image,
|
||||
)
|
||||
sustainability_analysis = sustainability_agent.run(
|
||||
"Examine the factory's sustainability practices", factory_image
|
||||
)
|
||||
efficiency_analysis = efficiency_agent.run(
|
||||
"Analyze the efficiency of the factory's manufacturing process",
|
||||
factory_image,
|
||||
)
|
@ -1,30 +0,0 @@
|
||||
from swarms.structs import Flow
|
||||
from swarms.models import Idefics
|
||||
|
||||
# Multi Modality Auto Agent
|
||||
llm = Idefics(max_length=2000)
|
||||
|
||||
task = "User: What is in this image? https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG"
|
||||
|
||||
## Initialize the workflow
|
||||
flow = Flow(
|
||||
llm=llm,
|
||||
max_loops=2,
|
||||
dashboard=True,
|
||||
# stopping_condition=None, # You can define a stopping condition as needed.
|
||||
# loop_interval=1,
|
||||
# retry_attempts=3,
|
||||
# retry_interval=1,
|
||||
# interactive=False, # Set to 'True' for interactive mode.
|
||||
# dynamic_temperature=False, # Set to 'True' for dynamic temperature handling.
|
||||
)
|
||||
|
||||
# out = flow.load_state("flow_state.json")
|
||||
# temp = flow.dynamic_temperature()
|
||||
# filter = flow.add_response_filter("Trump")
|
||||
out = flow.run(task)
|
||||
# out = flow.validate_response(out)
|
||||
# out = flow.analyze_feedback(out)
|
||||
# out = flow.print_history_and_memory()
|
||||
# # out = flow.save_state("flow_state.json")
|
||||
# print(out)
|
@ -0,0 +1,17 @@
|
||||
from swarms.structs import Agent
|
||||
from swarms.models.gpt4_vision_api import GPT4VisionAPI
|
||||
|
||||
|
||||
llm = GPT4VisionAPI()
|
||||
|
||||
task = "What is the color of the object?"
|
||||
img = "images/swarms.jpeg"
|
||||
|
||||
## Initialize the workflow
|
||||
agent = Agent(
|
||||
llm=llm,
|
||||
max_loops="auto",
|
||||
dashboard=True,
|
||||
)
|
||||
|
||||
agent.run(task=task, img=img)
|
@ -0,0 +1,185 @@
|
||||
import datetime
|
||||
import os
|
||||
|
||||
import streamlit as st
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from swarms.models import OpenAIChat
|
||||
from swarms.models.gpt4_vision_api import GPT4VisionAPI
|
||||
from swarms.models.stable_diffusion import StableDiffusion
|
||||
from swarms.structs import Agent
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
stability_api_key = os.getenv("STABLE_API_KEY")
|
||||
|
||||
# Initialize the models
|
||||
vision_api = GPT4VisionAPI(api_key=openai_api_key)
|
||||
sd_api = StableDiffusion(api_key=stability_api_key)
|
||||
gpt_api = OpenAIChat(openai_api_key=openai_api_key)
|
||||
|
||||
|
||||
class Idea2Image(Agent):
|
||||
def __init__(self, llm, vision_api):
|
||||
super().__init__(llm=llm)
|
||||
self.vision_api = vision_api
|
||||
|
||||
def run(self, initial_prompt, num_iterations, run_folder):
|
||||
current_prompt = initial_prompt
|
||||
|
||||
for i in range(num_iterations):
|
||||
print(f"Iteration {i}: Image generation and analysis")
|
||||
|
||||
if i == 0:
|
||||
current_prompt = self.enrich_prompt(current_prompt)
|
||||
print(f"Enriched Prompt: {current_prompt}")
|
||||
|
||||
img = sd_api.generate_and_move_image(
|
||||
current_prompt, i, run_folder
|
||||
)
|
||||
if not img:
|
||||
print("Failed to generate image")
|
||||
break
|
||||
print(f"Generated image at: {img}")
|
||||
|
||||
analysis = (
|
||||
self.vision_api.run(img, current_prompt)
|
||||
if img
|
||||
else None
|
||||
)
|
||||
if analysis:
|
||||
current_prompt += (
|
||||
". " + analysis[:500]
|
||||
) # Ensure the analysis is concise
|
||||
print(f"Image Analysis: {analysis}")
|
||||
else:
|
||||
print(f"Failed to analyze image at: {img}")
|
||||
|
||||
def enrich_prompt(self, prompt):
|
||||
enrichment_task = (
|
||||
"Create a concise and effective image generation prompt"
|
||||
" within 400 characters or less, based on Stable"
|
||||
" Diffusion and Dalle best practices to help it create"
|
||||
" much better images. Starting prompt:"
|
||||
f" \n\n'{prompt}'\n\nImprove the prompt with any"
|
||||
" applicable details or keywords by considering the"
|
||||
" following aspects: \n1. Subject details (like actions,"
|
||||
" emotions, environment) \n2. Artistic style (such as"
|
||||
" surrealism, hyperrealism) \n3. Medium (digital"
|
||||
" painting, oil on canvas) \n4. Color themes and"
|
||||
" lighting (like warm colors, cinematic lighting) \n5."
|
||||
" Composition and framing (close-up, wide-angle) \n6."
|
||||
" Additional elements (like a specific type of"
|
||||
" background, weather conditions) \n7. Any other"
|
||||
" artistic or thematic details that can make the image"
|
||||
" more vivid and compelling. Help the image generator"
|
||||
" create better images by enriching the prompt."
|
||||
)
|
||||
llm_result = self.llm.generate([enrichment_task])
|
||||
return (
|
||||
llm_result.generations[0][0].text[:500]
|
||||
if llm_result.generations
|
||||
else None
|
||||
)
|
||||
|
||||
def run_gradio(self, initial_prompt, num_iterations, run_folder):
|
||||
results = []
|
||||
current_prompt = initial_prompt
|
||||
|
||||
for i in range(num_iterations):
|
||||
enriched_prompt = (
|
||||
self.enrich_prompt(current_prompt)
|
||||
if i == 0
|
||||
else current_prompt
|
||||
)
|
||||
img_path = sd_api.generate_and_move_image(
|
||||
enriched_prompt, i, run_folder
|
||||
)
|
||||
analysis = (
|
||||
self.vision_api.run(img_path, enriched_prompt)
|
||||
if img_path
|
||||
else None
|
||||
)
|
||||
|
||||
if analysis:
|
||||
current_prompt += (
|
||||
". " + analysis[:500]
|
||||
) # Ensuring the analysis is concise
|
||||
results.append((enriched_prompt, img_path, analysis))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# print(
|
||||
# colored("---------------------------------------- MultiModal Tree of Thought agents for Image Generation", "cyan", attrs=["bold"])
|
||||
# )
|
||||
# # User input and setup
|
||||
# user_prompt = input("Prompt for image generation: ")
|
||||
# num_iterations = int(
|
||||
# input("Enter the number of iterations for image improvement: ")
|
||||
# )
|
||||
# run_folder = os.path.join(
|
||||
# "runs", datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
# )
|
||||
# os.makedirs(run_folder, exist_ok=True)
|
||||
|
||||
# print(
|
||||
# colored(
|
||||
# f"---------------------------------- Running Multi-Modal Tree of thoughts agent with {num_iterations} iterations", "green"
|
||||
# )
|
||||
# )
|
||||
# # Initialize and run the agent
|
||||
# idea2image_agent = Idea2Image(gpt_api, vision_api)
|
||||
# idea2image_agent.run(user_prompt, num_iterations, run_folder)
|
||||
|
||||
# print("Idea space has been traversed.")
|
||||
|
||||
|
||||
# Load environment variables and initialize the models
|
||||
load_dotenv()
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
stability_api_key = os.getenv("STABLE_API_KEY")
|
||||
vision_api = GPT4VisionAPI(api_key=openai_api_key)
|
||||
sd_api = StableDiffusion(api_key=stability_api_key)
|
||||
gpt_api = OpenAIChat(openai_api_key=openai_api_key)
|
||||
|
||||
# Define the modified Idea2Image class here
|
||||
|
||||
# Streamlit UI layout
|
||||
st.title(
|
||||
"Explore the infinite Multi-Modal Idea Space with Idea2Image"
|
||||
)
|
||||
user_prompt = st.text_input("Prompt for image generation:")
|
||||
num_iterations = st.number_input(
|
||||
"Enter the number of iterations for image improvement:",
|
||||
min_value=1,
|
||||
step=1,
|
||||
)
|
||||
|
||||
if st.button("Generate Image"):
|
||||
run_folder = os.path.join(
|
||||
"runs", datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
)
|
||||
os.makedirs(run_folder, exist_ok=True)
|
||||
idea2image_agent = Idea2Image(gpt_api, vision_api)
|
||||
|
||||
results = idea2image_agent.run_gradio(
|
||||
user_prompt, num_iterations, run_folder
|
||||
)
|
||||
|
||||
for i, (enriched_prompt, img_path, analysis) in enumerate(
|
||||
results
|
||||
):
|
||||
st.write(f"Iteration {i+1}:")
|
||||
st.write("Enriched Prompt:", enriched_prompt)
|
||||
if img_path:
|
||||
st.image(img_path, caption="Generated Image")
|
||||
else:
|
||||
st.error("Failed to generate image")
|
||||
if analysis:
|
||||
st.write("Image Analysis:", analysis)
|
||||
|
||||
st.success("Idea space has been traversed.")
|
||||
|
||||
# [Add any additional necessary code adjustments]
|
@ -0,0 +1,114 @@
|
||||
"""
|
||||
Multi Modal tree of thoughts that leverages the GPT-4 language model and the
|
||||
Stable Diffusion model to generate a multimodal output and evaluate the
|
||||
output based a metric from 0.0 to 1.0 and then run a search algorithm using DFS and BFS and return the best output.
|
||||
|
||||
|
||||
task: Generate an image of a swarm of bees -> Image generator -> GPT4V evaluates the img from 0.0 to 1.0 -> DFS/BFS -> return the best output
|
||||
|
||||
|
||||
- GPT4Vision will evaluate the image from 0.0 to 1.0 based on how likely it accomplishes the task
|
||||
- DFS/BFS will search for the best output based on the evaluation from GPT4Vision
|
||||
- The output will be a multimodal output that is a combination of the image and the text
|
||||
- The output will be evaluated by GPT4Vision
|
||||
- The prompt to the image generator will be optimized from the output of GPT4Vision and the search
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from swarms.models.gpt4_vision_api import GPT4VisionAPI
|
||||
from swarms.models.stable_diffusion import StableDiffusion
|
||||
from termcolor import colored
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Get the API key from the environment
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
stable_api_key = os.environ.get("STABLE_API_KEY")
|
||||
|
||||
|
||||
# Initialize the language model
|
||||
llm = GPT4VisionAPI(
|
||||
openai_api_key=api_key,
|
||||
max_tokens=500,
|
||||
)
|
||||
|
||||
# IMG Generator
|
||||
img_generator = StableDiffusion(api_key=stable_api_key)
|
||||
|
||||
|
||||
# Initialize the language model
|
||||
task = "Garden of Eden futuristic city graphic art"
|
||||
|
||||
|
||||
def evaluate_img(llm, task: str, img: str):
|
||||
EVAL_IMG = f"""
|
||||
Evaluate the image: {img} on a scale from 0.0 to 1.0 based on how likely it accomplishes the task: {task}. Output nothing than the float representing the evaluated img.
|
||||
"""
|
||||
out = llm.run(task=EVAL_IMG, img=img)
|
||||
out = float(out)
|
||||
return out
|
||||
|
||||
|
||||
def enrichment_prompt(starting_prompt: str, evaluated_img: str):
|
||||
enrichment_task = (
|
||||
"Create a concise and effective image generation prompt"
|
||||
" within 400 characters or less, based on Stable Diffusion"
|
||||
" and Dalle best practices. Starting prompt:"
|
||||
f" \n\n'{starting_prompt}'\n\nImprove the prompt with any"
|
||||
" applicable details or keywords by considering the"
|
||||
" following aspects: \n1. Subject details (like actions,"
|
||||
" emotions, environment) \n2. Artistic style (such as"
|
||||
" surrealism, hyperrealism) \n3. Medium (digital painting,"
|
||||
" oil on canvas) \n4. Color themes and lighting (like warm"
|
||||
" colors, cinematic lighting) \n5. Composition and framing"
|
||||
" (close-up, wide-angle) \n6. Additional elements (like a"
|
||||
" specific type of background, weather conditions) \n7. Any"
|
||||
" other artistic or thematic details that can make the image"
|
||||
" more vivid and compelling. 8. Based on the evaluation of"
|
||||
" the first generated prompt used by the first prompt:"
|
||||
f" {evaluated_img} Enrich the prompt to generate a more"
|
||||
" compelling image. Output only a new prompt to create a"
|
||||
" better image"
|
||||
)
|
||||
return enrichment_task
|
||||
|
||||
|
||||
# Main loop
|
||||
max_iterations = 10 # Define the maximum number of iterations
|
||||
best_score = 0
|
||||
best_image = None
|
||||
|
||||
for _ in range(max_iterations):
|
||||
# Generate an image and get its path
|
||||
print(colored(f"Generating img for Task: {task}", "purple"))
|
||||
|
||||
img_path = img_generator.run(
|
||||
task=task
|
||||
) # This should return the file path of the generated image
|
||||
img_path = img_path[0]
|
||||
print(colored(f"Generated Image Path: {img_path}", "green"))
|
||||
|
||||
# Evaluate the image by passing the file path
|
||||
score = evaluate_img(llm, task, img_path)
|
||||
print(
|
||||
colored(
|
||||
f"Evaluated Image Score: {score} for {img_path}", "cyan"
|
||||
)
|
||||
)
|
||||
|
||||
# Update the best score and image path if necessary
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_image_path = img_path
|
||||
|
||||
# Enrich the prompt based on the evaluation
|
||||
prompt = enrichment_prompt(task, score)
|
||||
print(colored(f"Enrichment Prompt: {prompt}", "yellow"))
|
||||
|
||||
|
||||
# Output the best result
|
||||
print("Best Image Path:", best_image_path)
|
||||
print("Best Score:", best_score)
|
After Width: | Height: | Size: 193 KiB |
@ -0,0 +1,137 @@
|
||||
import os
|
||||
import base64
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
from swarms.models import Anthropic, OpenAIChat
|
||||
from swarms.structs import Agent
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
# Define prompts for various tasks
|
||||
MEAL_PLAN_PROMPT = (
|
||||
"Based on the following user preferences: dietary restrictions as"
|
||||
" vegetarian, preferred cuisines as Italian and Indian, a total"
|
||||
" caloric intake of around 2000 calories per day, and an"
|
||||
" exclusion of legumes, create a detailed weekly meal plan."
|
||||
" Include a variety of meals for breakfast, lunch, dinner, and"
|
||||
" optional snacks."
|
||||
)
|
||||
IMAGE_ANALYSIS_PROMPT = (
|
||||
"Identify the items in this fridge, including their quantities"
|
||||
" and condition."
|
||||
)
|
||||
|
||||
|
||||
# Function to encode image to base64
|
||||
def encode_image(image_path):
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
|
||||
|
||||
# Initialize Language Model (LLM)
|
||||
llm = OpenAIChat(
|
||||
openai_api_key=openai_api_key,
|
||||
max_tokens=3000,
|
||||
)
|
||||
|
||||
|
||||
# Function to handle vision tasks
|
||||
def create_vision_agent(image_path):
|
||||
base64_image = encode_image(image_path)
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {openai_api_key}",
|
||||
}
|
||||
payload = {
|
||||
"model": "gpt-4-vision-preview",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": IMAGE_ANALYSIS_PROMPT},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
"max_tokens": 300,
|
||||
}
|
||||
response = requests.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
return response.json()
|
||||
|
||||
|
||||
# Function to generate an integrated shopping list considering meal plan and fridge contents
|
||||
def generate_integrated_shopping_list(
|
||||
meal_plan_output, image_analysis, user_preferences
|
||||
):
|
||||
# Prepare the prompt for the LLM
|
||||
fridge_contents = image_analysis["choices"][0]["message"][
|
||||
"content"
|
||||
]
|
||||
prompt = (
|
||||
f"Based on this meal plan: {meal_plan_output}, and the"
|
||||
f" following items in the fridge: {fridge_contents},"
|
||||
" considering dietary preferences as vegetarian with a"
|
||||
" preference for Italian and Indian cuisines, generate a"
|
||||
" comprehensive shopping list that includes only the items"
|
||||
" needed."
|
||||
)
|
||||
|
||||
# Send the prompt to the LLM and return the response
|
||||
response = llm(prompt)
|
||||
return response # assuming the response is a string
|
||||
|
||||
|
||||
# Define agent for meal planning
|
||||
meal_plan_agent = Agent(
|
||||
llm=llm,
|
||||
sop=MEAL_PLAN_PROMPT,
|
||||
max_loops=1,
|
||||
autosave=True,
|
||||
saved_state_path="meal_plan_agent.json",
|
||||
)
|
||||
|
||||
# User preferences for meal planning
|
||||
user_preferences = {
|
||||
"dietary_restrictions": "vegetarian",
|
||||
"preferred_cuisines": ["Italian", "Indian"],
|
||||
"caloric_intake": 2000,
|
||||
"other notes": "Doesn't eat legumes",
|
||||
}
|
||||
|
||||
# Generate Meal Plan
|
||||
meal_plan_output = meal_plan_agent.run(
|
||||
f"Generate a meal plan: {user_preferences}"
|
||||
)
|
||||
|
||||
# Vision Agent - Analyze an Image
|
||||
image_analysis_output = create_vision_agent("full_fridge.jpg")
|
||||
|
||||
# Generate Integrated Shopping List
|
||||
integrated_shopping_list = generate_integrated_shopping_list(
|
||||
meal_plan_output, image_analysis_output, user_preferences
|
||||
)
|
||||
|
||||
# Print and save the outputs
|
||||
print("Meal Plan:", meal_plan_output)
|
||||
print("Integrated Shopping List:", integrated_shopping_list)
|
||||
|
||||
with open("nutrition_output.txt", "w") as file:
|
||||
file.write("Meal Plan:\n" + meal_plan_output + "\n\n")
|
||||
file.write(
|
||||
"Integrated Shopping List:\n"
|
||||
+ integrated_shopping_list
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
print("Outputs have been saved to nutrition_output.txt")
|
After Width: | Height: | Size: 41 KiB |
@ -0,0 +1,157 @@
|
||||
"""
|
||||
Swarm of multi modal autonomous agents for manufacturing!
|
||||
---------------------------------------------------------
|
||||
Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest
|
||||
Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest
|
||||
Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest
|
||||
Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest
|
||||
Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest
|
||||
Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest
|
||||
Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest
|
||||
|
||||
|
||||
Agent:
|
||||
health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
|
||||
"""
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from termcolor import colored
|
||||
|
||||
from swarms.models import GPT4VisionAPI
|
||||
from swarms.structs import Agent
|
||||
|
||||
load_dotenv()
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
# GPT4VisionAPI
|
||||
llm = GPT4VisionAPI(openai_api_key=api_key, max_tokens=2000)
|
||||
|
||||
assembly_line = (
|
||||
"playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg"
|
||||
)
|
||||
red_robots = (
|
||||
"playground/demos/swarm_of_mma_manufacturing/red_robots.jpg"
|
||||
)
|
||||
robots = "playground/demos/swarm_of_mma_manufacturing/robots.jpg"
|
||||
tesla_assembly_line = (
|
||||
"playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg"
|
||||
)
|
||||
|
||||
|
||||
# Define detailed prompts for each agent
|
||||
tasks = {
|
||||
"health_safety": (
|
||||
"Analyze the factory's working environment for health safety."
|
||||
" Focus on cleanliness, ventilation, spacing between"
|
||||
" workstations, and personal protective equipment"
|
||||
" availability."
|
||||
),
|
||||
"productivity": (
|
||||
"Review the factory's workflow efficiency, machine"
|
||||
" utilization, and employee engagement. Identify operational"
|
||||
" delays or bottlenecks."
|
||||
),
|
||||
"safety": (
|
||||
"Analyze the factory's safety measures, including fire exits,"
|
||||
" safety signage, and emergency response equipment."
|
||||
),
|
||||
"security": (
|
||||
"Evaluate the factory's security systems, entry/exit"
|
||||
" controls, and potential vulnerabilities."
|
||||
),
|
||||
"sustainability": (
|
||||
"Inspect the factory's sustainability practices, including"
|
||||
" waste management, energy usage, and eco-friendly processes."
|
||||
),
|
||||
"efficiency": (
|
||||
"Assess the manufacturing process's efficiency, considering"
|
||||
" the layout, logistics, and automation level."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# Define prompts for each agent
|
||||
health_safety_prompt = tasks["health_safety"]
|
||||
productivity_prompt = tasks["productivity"]
|
||||
safety_prompt = tasks["safety"]
|
||||
security_prompt = tasks["security"]
|
||||
sustainability_prompt = tasks["sustainability"]
|
||||
efficiency_prompt = tasks["efficiency"]
|
||||
|
||||
|
||||
# Health security agent
|
||||
health_security_agent = Agent(
|
||||
llm=llm,
|
||||
sop_list=health_safety_prompt,
|
||||
max_loops=1,
|
||||
multi_modal=True,
|
||||
)
|
||||
|
||||
# Quality control agent
|
||||
productivity_check_agent = Agent(
|
||||
llm=llm,
|
||||
sop=productivity_prompt,
|
||||
max_loops=1,
|
||||
multi_modal=True,
|
||||
autosave=True,
|
||||
)
|
||||
|
||||
# Security agent
|
||||
security_check_agent = Agent(
|
||||
llm=llm,
|
||||
sop=security_prompt,
|
||||
max_loops=1,
|
||||
multi_modal=True,
|
||||
autosave=True,
|
||||
)
|
||||
|
||||
# Efficiency agent
|
||||
efficiency_check_agent = Agent(
|
||||
llm=llm,
|
||||
sop=efficiency_prompt,
|
||||
max_loops=1,
|
||||
multi_modal=True,
|
||||
autosave=True,
|
||||
)
|
||||
|
||||
print(colored("Running the agents...", "green"))
|
||||
|
||||
|
||||
print(colored("Running health check agent initializing...", "cyan"))
|
||||
# Add the first task to the health_security_agent
|
||||
health_check = health_security_agent.run(
|
||||
"Analyze the safety of this factory", robots
|
||||
)
|
||||
|
||||
|
||||
print(
|
||||
colored(
|
||||
"--------------- Productivity agents initializing...", "green"
|
||||
)
|
||||
)
|
||||
# Add the third task to the productivity_check_agent
|
||||
productivity_check = productivity_check_agent.run(
|
||||
health_check, assembly_line
|
||||
)
|
||||
|
||||
print(
|
||||
colored(
|
||||
"--------------- Security agents initializing...", "green"
|
||||
)
|
||||
)
|
||||
# Add the fourth task to the security_check_agent
|
||||
security_check = security_check_agent.run(
|
||||
productivity_check, red_robots
|
||||
)
|
||||
|
||||
|
||||
print(
|
||||
colored(
|
||||
"--------------- Efficiency agents initializing...", "cyan"
|
||||
)
|
||||
)
|
||||
# Add the fifth task to the efficiency_check_agent
|
||||
efficiency_check = efficiency_check_agent.run(
|
||||
security_check, tesla_assembly_line
|
||||
)
|
After Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 39 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 451 KiB |
@ -0,0 +1,84 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from swarms.models import OpenAIChat, GPT4VisionAPI
|
||||
from swarms.structs import Agent, SequentialWorkflow
|
||||
import swarms.prompts.urban_planning as upp
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
stability_api_key = os.getenv("STABILITY_API_KEY")
|
||||
|
||||
# Initialize language model
|
||||
llm = OpenAIChat(
|
||||
openai_api_key=api_key, temperature=0.5, max_tokens=3000
|
||||
)
|
||||
|
||||
# Initialize Vision model
|
||||
vision_api = GPT4VisionAPI(api_key=api_key)
|
||||
|
||||
# Initialize agents for urban planning tasks
|
||||
architecture_analysis_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=upp.ARCHITECTURE_ANALYSIS_PROMPT
|
||||
)
|
||||
infrastructure_evaluation_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=upp.INFRASTRUCTURE_EVALUATION_PROMPT
|
||||
)
|
||||
traffic_flow_analysis_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=upp.TRAFFIC_FLOW_ANALYSIS_PROMPT
|
||||
)
|
||||
environmental_impact_assessment_agent = Agent(
|
||||
llm=llm,
|
||||
max_loops=1,
|
||||
sop=upp.ENVIRONMENTAL_IMPACT_ASSESSMENT_PROMPT,
|
||||
)
|
||||
public_space_utilization_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=upp.PUBLIC_SPACE_UTILIZATION_PROMPT
|
||||
)
|
||||
socioeconomic_impact_analysis_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=upp.SOCIOECONOMIC_IMPACT_ANALYSIS_PROMPT
|
||||
)
|
||||
|
||||
# Initialize the final planning agent
|
||||
final_plan_agent = Agent(
|
||||
llm=llm, max_loops=1, sop=upp.FINAL_URBAN_IMPROVEMENT_PLAN_PROMPT
|
||||
)
|
||||
|
||||
# Create Sequential Workflow
|
||||
workflow = SequentialWorkflow(max_loops=1)
|
||||
|
||||
# Add tasks to workflow with personalized prompts
|
||||
workflow.add(architecture_analysis_agent, "Architecture Analysis")
|
||||
workflow.add(
|
||||
infrastructure_evaluation_agent, "Infrastructure Evaluation"
|
||||
)
|
||||
workflow.add(traffic_flow_analysis_agent, "Traffic Flow Analysis")
|
||||
workflow.add(
|
||||
environmental_impact_assessment_agent,
|
||||
"Environmental Impact Assessment",
|
||||
)
|
||||
workflow.add(
|
||||
public_space_utilization_agent, "Public Space Utilization"
|
||||
)
|
||||
workflow.add(
|
||||
socioeconomic_impact_analysis_agent,
|
||||
"Socioeconomic Impact Analysis",
|
||||
)
|
||||
workflow.add(
|
||||
final_plan_agent,
|
||||
(
|
||||
"Generate the final urban improvement plan based on all"
|
||||
" previous agent's findings"
|
||||
),
|
||||
)
|
||||
# Run the workflow for individual analysis tasks
|
||||
|
||||
# Execute the workflow for the final planning
|
||||
workflow.run()
|
||||
|
||||
# Output results for each task and the final plan
|
||||
for task in workflow.tasks:
|
||||
print(
|
||||
f"Task Description: {task.description}\nResult:"
|
||||
f" {task.result}\n"
|
||||
)
|
@ -0,0 +1,73 @@
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from swarms.models import GPT4VisionAPI, OpenAIChat
|
||||
from swarms.prompts.xray_swarm_prompt import (
|
||||
TREATMENT_PLAN_PROMPT,
|
||||
XRAY_ANALYSIS_PROMPT,
|
||||
)
|
||||
from swarms.structs.agent import Agent
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
# Function to analyze an X-ray image
|
||||
multimodal_llm = GPT4VisionAPI(
|
||||
openai_api_key=openai_api_key,
|
||||
)
|
||||
|
||||
# Initialize Language Model (LLM)
|
||||
llm = OpenAIChat(
|
||||
openai_api_key=openai_api_key,
|
||||
max_tokens=3000,
|
||||
)
|
||||
|
||||
|
||||
# Function to analyze an X-ray image
|
||||
analyze_xray_agent = Agent(
|
||||
llm=multimodal_llm,
|
||||
autosave=True,
|
||||
sop=XRAY_ANALYSIS_PROMPT,
|
||||
multi_modal=True,
|
||||
)
|
||||
|
||||
|
||||
# Treatment Plan Agent
|
||||
treatment_agent = Agent(
|
||||
llm=multimodal_llm,
|
||||
autosave=True,
|
||||
sop=TREATMENT_PLAN_PROMPT,
|
||||
max_loops=4,
|
||||
)
|
||||
|
||||
|
||||
# Function to generate a treatment plan
|
||||
def generate_treatment_plan(diagnosis):
|
||||
treatment_plan_prompt = TREATMENT_PLAN_PROMPT.format(diagnosis)
|
||||
# Using the llm object with the 'prompt' argument
|
||||
return treatment_agent.run(treatment_plan_prompt)
|
||||
|
||||
|
||||
# X-ray Agent - Analyze an X-ray image
|
||||
xray_image_path = "playground/demos/xray/xray2.jpg"
|
||||
|
||||
|
||||
# Diagnosis
|
||||
diagnosis = analyze_xray_agent.run(
|
||||
task="Analyze the following XRAY", img=xray_image_path
|
||||
)
|
||||
|
||||
# Generate Treatment Plan
|
||||
treatment_plan_output = generate_treatment_plan(diagnosis)
|
||||
|
||||
# Print and save the outputs
|
||||
print("X-ray Analysis:", diagnosis)
|
||||
print("Treatment Plan:", treatment_plan_output)
|
||||
|
||||
with open("medical_analysis_output.txt", "w") as file:
|
||||
file.write("X-ray Analysis:\n" + diagnosis + "\n\n")
|
||||
file.write("Treatment Plan:\n" + treatment_plan_output + "\n")
|
||||
|
||||
print("Outputs have been saved to medical_analysis_output.txt")
|
After Width: | Height: | Size: 994 KiB |
@ -0,0 +1,25 @@
|
||||
from langchain.document_loaders import CSVLoader
|
||||
from swarms.memory import qdrant
|
||||
|
||||
loader = CSVLoader(
|
||||
file_path="../document_parsing/aipg/aipg.csv",
|
||||
encoding="utf-8-sig",
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
|
||||
# Initialize the Qdrant instance
|
||||
# See qdrant documentation on how to run locally
|
||||
qdrant_client = qdrant.Qdrant(
|
||||
host="https://697ea26c-2881-4e17-8af4-817fcb5862e8.europe-west3-0.gcp.cloud.qdrant.io",
|
||||
collection_name="qdrant",
|
||||
api_key="BhG2_yINqNU-aKovSEBadn69Zszhbo5uaqdJ6G_qDkdySjAljvuPqQ",
|
||||
)
|
||||
qdrant_client.add_vectors(docs)
|
||||
|
||||
# Perform a search
|
||||
search_query = "Who is jojo"
|
||||
search_results = qdrant_client.search_vectors(search_query)
|
||||
print("Search Results:")
|
||||
for result in search_results:
|
||||
print(result)
|
@ -1,7 +1,9 @@
|
||||
from swarms.models.mpt import MPT
|
||||
|
||||
mpt_instance = MPT(
|
||||
"mosaicml/mpt-7b-storywriter", "EleutherAI/gpt-neox-20b", max_tokens=150
|
||||
"mosaicml/mpt-7b-storywriter",
|
||||
"EleutherAI/gpt-neox-20b",
|
||||
max_tokens=150,
|
||||
)
|
||||
|
||||
mpt_instance.generate("Once upon a time in a land far, far away...")
|
||||
|
@ -0,0 +1,77 @@
|
||||
"""
|
||||
|
||||
|
||||
tool decorated func [search_api] -> agent which parses the docs of the tool func
|
||||
-> injected into prompt -> agent will output json containing tool usage -> agent output will be parsed -> tool executed
|
||||
-> terminal response can be returned to agent for self-healing
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Import the OpenAIChat model and the Agent struct
|
||||
from swarms.models import OpenAIChat
|
||||
from swarms.structs import Agent
|
||||
from swarms.tools.tool import tool
|
||||
|
||||
# Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# Define a tool
|
||||
@tool
|
||||
def search_api(query: str, description: str):
|
||||
"""Search the web for the query
|
||||
|
||||
Args:
|
||||
query (str): _description_
|
||||
|
||||
Returns:
|
||||
_type_: _description_
|
||||
"""
|
||||
return f"Search results for {query}"
|
||||
|
||||
|
||||
@tool
|
||||
def weather_api(
|
||||
query: str,
|
||||
):
|
||||
"""_summary_
|
||||
|
||||
Args:
|
||||
query (str): _description_
|
||||
"""
|
||||
print(f"Getting the weather for {query}")
|
||||
|
||||
|
||||
@tool
|
||||
def rapid_api(query: str):
|
||||
"""_summary_
|
||||
|
||||
Args:
|
||||
query (str): _description_
|
||||
"""
|
||||
print(f"Getting the weather for {query}")
|
||||
|
||||
|
||||
# Get the API key from the environment
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
# Initialize the language model
|
||||
llm = OpenAIChat(
|
||||
temperature=0.5,
|
||||
openai_api_key=api_key,
|
||||
)
|
||||
|
||||
|
||||
## Initialize the workflow
|
||||
agent = Agent(
|
||||
llm=llm, max_loops=1, dashboard=True, tools=[search_api]
|
||||
)
|
||||
|
||||
# Run the workflow on a task
|
||||
out = agent.run("Generate a 10,000 word blog on health and wellness.")
|
||||
print(out)
|