diff --git a/.env.example b/.env.example
index c6c3cade..6197a6d4 100644
--- a/.env.example
+++ b/.env.example
@@ -5,7 +5,7 @@ AI21_API_KEY="your_api_key_here"
COHERE_API_KEY="your_api_key_here"
ALEPHALPHA_API_KEY="your_api_key_here"
HUGGINFACEHUB_API_KEY="your_api_key_here"
-
+STABILITY_API_KEY="your_api_key_here"
WOLFRAM_ALPHA_APPID="your_wolfram_alpha_appid_here"
ZAPIER_NLA_API_KEY="your_zapier_nla_api_key_here"
@@ -41,4 +41,4 @@ REDIS_PORT=
PINECONE_API_KEY=""
BING_COOKIE=""
-PSG_CONNECTION_STRING=""
\ No newline at end of file
+PSG_CONNECTION_STRING=""
diff --git a/.flake8 b/.flake8
index d148ed3e..eedac54c 100644
--- a/.flake8
+++ b/.flake8
@@ -1,2 +1,2 @@
[flake8]
-extend-ignore = E501, W292, W291
\ No newline at end of file
+extend-ignore = E501, W292, W291, W293
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index c626b001..e3733b98 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,13 +1,14 @@
+---
# These are supported funding model platforms
github: [kyegomez]
-patreon: # Replace with a single Patreon username
-open_collective: # Replace with a single Open Collective username
-ko_fi: # Replace with a single Ko-fi username
-tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
-community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
-liberapay: # Replace with a single Liberapay username
-issuehunt: # Replace with a single IssueHunt username
-otechie: # Replace with a single Otechie username
-lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
-custom: #Nothing
+# patreon: # Replace with a single Patreon username
+# open_collective: # Replace with a single Open Collective username
+# ko_fi: # Replace with a single Ko-fi username
+# tidelift: # Replace with a single Tidelift platform-name/package-name
+# community_bridge: # Replace with a single Community Bridge project-name
+# liberapay: # Replace with a single Liberapay username
+# issuehunt: # Replace with a single IssueHunt username
+# otechie: # Replace with a single Otechie username
+# lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name
+# custom: #Nothing
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 34b75fb9..b523994a 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,3 +1,4 @@
+---
# https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates
version: 2
@@ -11,4 +12,3 @@ updates:
directory: "/"
schedule:
interval: "weekly"
-
diff --git a/.github/labeler.yml b/.github/labeler.yml
index 72ccc40a..1fb9d7ec 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,12 +1,14 @@
+---
# this is a config file for the github action labeler
# Add 'label1' to any changes within 'example' folder or any subfolders
example_change:
-- example/**
+ - example/**
# Add 'label2' to any file changes within 'example2' folder
example2_change: example2/*
-# Add label3 to any change to .txt files within the entire repository. Quotation marks are required for the leading asterisk
+# Add label3 to any change to .txt files within the entire repository.
+# Quotation marks are required for the leading asterisk
text_files:
-- '**/*.txt'
\ No newline at end of file
+ - '**/*.txt'
diff --git a/.github/workflows/RELEASE.yml b/.github/workflows/RELEASE.yml
index 0abb2ed7..cde08689 100644
--- a/.github/workflows/RELEASE.yml
+++ b/.github/workflows/RELEASE.yml
@@ -22,10 +22,10 @@ jobs:
- uses: actions/checkout@v4
- name: Install poetry
run: pipx install poetry==$POETRY_VERSION
- - name: Set up Python 3.10
+ - name: Set up Python 3.9
uses: actions/setup-python@v4
with:
- python-version: "3.10"
+ python-version: "3.9"
cache: "poetry"
- name: Build project for distribution
run: poetry build
diff --git a/.github/workflows/code_quality_control.yml b/.github/workflows/code_quality_control.yml
deleted file mode 100644
index a0a49572..00000000
--- a/.github/workflows/code_quality_control.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: Linting and Formatting
-
-on:
- push:
- branches:
- - master
-
-jobs:
- lint_and_format:
- runs-on: ubuntu-latest
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: 3.x
-
- - name: Install dependencies
- run: pip install -r requirements.txt
-
- - name: Find Python files
- run: find swarms -name "*.py" -type f -exec autopep8 --in-place --aggressive --aggressive {} +
-
- - name: Push changes
- uses: ad-m/github-push-action@master
- with:
- github_token: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/cos_integration.yml b/.github/workflows/cos_integration.yml
deleted file mode 100644
index 3bfb587c..00000000
--- a/.github/workflows/cos_integration.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-name: Continuous Integration
-
-on:
- push:
- branches:
- - master
-
-jobs:
- test:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: 3.x
-
- - name: Install dependencies
- run: pip install -r requirements.txt
-
- - name: Run unit tests
- run: pytest tests/unit
-
- - name: Run integration tests
- run: pytest tests/integration
-
- - name: Run code coverage
- run: pytest --cov=swarms tests/
-
- - name: Run linters
- run: pylint swarms
-
- - name: Build documentation
- run: make docs
-
- - name: Validate documentation
- run: sphinx-build -b linkcheck docs build/docs
-
- - name: Run performance tests
- run: find ./tests -name '*.py' -exec pytest {} \;
\ No newline at end of file
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index 793d8e0e..6ed5bcba 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -1,18 +1,19 @@
+---
name: Docker Image CI
-on:
+on: # yamllint disable-line rule:truthy
push:
- branches: [ "master" ]
+ branches: ["master"]
pull_request:
- branches: [ "master" ]
+ branches: ["master"]
jobs:
build:
runs-on: ubuntu-latest
-
+ name: Build Docker image
steps:
- - uses: actions/checkout@v4
- - name: Build the Docker image
- run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
+ - uses: actions/checkout@v4
+ - name: Build the Docker image
+ run: docker build . --file Dockerfile --tag my-image-name:$(date +%s)
diff --git a/.github/workflows/docs_test.yml b/.github/workflows/docs_test.yml
deleted file mode 100644
index 35d2ca91..00000000
--- a/.github/workflows/docs_test.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Documentation Tests
-
-on:
- push:
- branches:
- - master
-
-jobs:
- test:
- runs-on: ubuntu-latest
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: 3.x
-
- - name: Install dependencies
- run: pip install -r requirements.txt
-
- - name: Build documentation
- run: make docs
-
- - name: Validate documentation
- run: sphinx-build -b linkcheck docs build/docs
\ No newline at end of file
diff --git a/.github/workflows/generator-generic-ossf-slsa3-publish.yml b/.github/workflows/generator-generic-ossf-slsa3-publish.yml
deleted file mode 100644
index a36e782c..00000000
--- a/.github/workflows/generator-generic-ossf-slsa3-publish.yml
+++ /dev/null
@@ -1,66 +0,0 @@
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
-# This workflow lets you generate SLSA provenance file for your project.
-# The generation satisfies level 3 for the provenance requirements - see https://slsa.dev/spec/v0.1/requirements
-# The project is an initiative of the OpenSSF (openssf.org) and is developed at
-# https://github.com/slsa-framework/slsa-github-generator.
-# The provenance file can be verified using https://github.com/slsa-framework/slsa-verifier.
-# For more information about SLSA and how it improves the supply-chain, visit slsa.dev.
-
-name: SLSA generic generator
-on:
- workflow_dispatch:
- release:
- types: [created]
-
-jobs:
- build:
- runs-on: ubuntu-latest
- outputs:
- digests: ${{ steps.hash.outputs.digests }}
-
- steps:
- - uses: actions/checkout@v3
-
- # ========================================================
- #
- # Step 1: Build your artifacts.
- #
- # ========================================================
- - name: Build artifacts
- run: |
- # These are some amazing artifacts.
- echo "artifact1" > artifact1
- echo "artifact2" > artifact2
-
- # ========================================================
- #
- # Step 2: Add a step to generate the provenance subjects
- # as shown below. Update the sha256 sum arguments
- # to include all binaries that you generate
- # provenance for.
- #
- # ========================================================
- - name: Generate subject for provenance
- id: hash
- run: |
- set -euo pipefail
-
- # List the artifacts the provenance will refer to.
- files=$(ls artifact*)
- # Generate the subjects (base64 encoded).
- echo "hashes=$(sha256sum $files | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
- provenance:
- needs: [build]
- permissions:
- actions: read # To read the workflow path.
- id-token: write # To sign the provenance.
- contents: write # To add assets to a release.
- uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v1.4.0
- with:
- base64-subjects: "${{ needs.build.outputs.digests }}"
- upload-assets: true # Optional: Upload to a new release
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 97aa4732..8a6f374c 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -1,19 +1,29 @@
+---
# This is a basic workflow to help you get started with Actions
name: Lint
-on: [push, pull_request]
+on: [push, pull_request] # yamllint disable-line rule:truthy
jobs:
+ yaml-lint:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out source repository
+ uses: actions/checkout@v4
+ - name: yaml Lint
+ uses: ibiqlik/action-yamllint@v3
flake8-lint:
runs-on: ubuntu-latest
- name: Lint
+ name: flake8 Lint
steps:
- name: Check out source repository
uses: actions/checkout@v4
- name: Set up Python environment
- uses: actions/setup-python@v4
- with:
- python-version: "3.11"
- - name: flake8 Lint
- uses: py-actions/flake8@v2
\ No newline at end of file
+ uses: py-actions/flake8@v2
+ ruff-lint:
+ runs-on: ubuntu-latest
+ name: ruff Lint
+ steps:
+ - uses: actions/checkout@v4
+ - uses: chartboost/ruff-action@v1
diff --git a/.github/workflows/lints.yml b/.github/workflows/lints.yml
deleted file mode 100644
index 216903d5..00000000
--- a/.github/workflows/lints.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: Linting
-
-on:
- push:
- branches:
- - master
-
-jobs:
- lint:
- runs-on: ubuntu-latest
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v4
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: 3.x
-
- - name: Install dependencies
- run: pip install -r requirements.txt
-
- - name: Run linters
- run: pylint swarms
\ No newline at end of file
diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml
deleted file mode 100644
index ab01451f..00000000
--- a/.github/workflows/makefile.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: Makefile CI
-
-on:
- push:
- branches: [ "master" ]
- pull_request:
- branches: [ "master" ]
-
-jobs:
- build:
-
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v3
-
- - name: configure
- run: ./configure
-
- - name: Install dependencies
- run: make
-
- - name: Run check
- run: make check
-
- - name: Run distcheck
- run: make distcheck
diff --git a/.github/workflows/pr_request_checks.yml b/.github/workflows/pr_request_checks.yml
index 56b8c1f7..2b91f9a3 100644
--- a/.github/workflows/pr_request_checks.yml
+++ b/.github/workflows/pr_request_checks.yml
@@ -1,3 +1,4 @@
+---
name: Pull Request Checks
on:
@@ -19,9 +20,11 @@ jobs:
python-version: 3.x
- name: Install dependencies
- run: pip install -r requirements.txt
+ run: |
+ pip install -r requirements.txt
+ pip install pytest
- name: Run tests and checks
run: |
- find tests/ -name "*.py" | xargs pytest
- pylint swarms
\ No newline at end of file
+ pytest
+ pylint swarms
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
deleted file mode 100644
index 3f3ba2e2..00000000
--- a/.github/workflows/pylint.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: Pylint
-
-on: [push]
-
-jobs:
- build:
- runs-on: ubuntu-latest
- strategy:
- matrix:
- python-version: ["3.8", "3.9", "3.10"]
- steps:
- - uses: actions/checkout@v4
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install pylint
- - name: Analysing the code with pylint
- run: |
- pylint $(git ls-files '*.py')
diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml
deleted file mode 100644
index 5ff88856..00000000
--- a/.github/workflows/pyre.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
-# This workflow integrates Pyre with GitHub's
-# Code Scanning feature.
-#
-# Pyre is a performant type checker for Python compliant with
-# PEP 484. Pyre can analyze codebases with millions of lines
-# of code incrementally โ providing instantaneous feedback
-# to developers as they write code.
-#
-# See https://pyre-check.org
-
-name: Pyre
-
-on:
- workflow_dispatch:
- push:
- branches: [ "master" ]
- pull_request:
- branches: [ "master" ]
-
-permissions:
- contents: read
-
-jobs:
- pyre:
- permissions:
- actions: read
- contents: read
- security-events: write
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- with:
- submodules: true
-
- - name: Run Pyre
- uses: facebook/pyre-action@60697a7858f7cc8470d8cc494a3cf2ad6b06560d
- with:
- # To customize these inputs:
- # See https://github.com/facebook/pyre-action#inputs
- repo-directory: './'
- requirements-path: 'requirements.txt'
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
deleted file mode 100644
index 1f634309..00000000
--- a/.github/workflows/python-app.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-# This workflow will install Python dependencies, run tests and lint with a single version of Python
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
-name: Python application
-
-on:
- push:
- branches: [ "master" ]
- pull_request:
- branches: [ "master" ]
-
-permissions:
- contents: read
-
-jobs:
- build:
-
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v4
- - name: Set up Python 3.10
- uses: actions/setup-python@v4
- with:
- python-version: "3.10"
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install flake8 pytest
- if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- - name: Lint with flake8
- run: |
- # stop the build if there are Python syntax errors or undefined names
- flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
- # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
- flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- - name: Test with pytest
- run: |
- pytest
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index b8e0a17a..fab4f817 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -19,9 +19,9 @@ jobs:
python-version: ["3.7", "3.9", "3.10", "3.11"]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v3
+ uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
@@ -38,4 +38,4 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
- find ./tests -name '*.py' -exec pytest {} \;
+ pytest
\ No newline at end of file
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index c8f4ba0c..9fc16341 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -1,7 +1,7 @@
-
+---
name: Upload Python Package
-on:
+on: # yamllint disable-line rule:truthy
release:
types: [published]
@@ -14,19 +14,19 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: '3.x'
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install build
- - name: Build package
- run: python -m build
- - name: Publish package
- uses: pypa/gh-action-pypi-publish@b7f401de30cb6434a1e19f805ff006643653240e
- with:
- user: __token__
- password: ${{ secrets.PYPI_API_TOKEN }}
\ No newline at end of file
+ - uses: actions/checkout@v4
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.x'
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build
+ - name: Build package
+ run: python -m build
+ - name: Publish package
+ uses: pypa/gh-action-pypi-publish@b7f401de30cb6434a1e19f805ff006643653240e
+ with:
+ user: __token__
+ password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
deleted file mode 100644
index 9b09cfa9..00000000
--- a/.github/workflows/quality.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: Quality
-
-on:
- push:
- branches: [ "master" ]
- pull_request:
- branches: [ "master" ]
-
-jobs:
- lint:
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- steps:
- - name: Checkout actions
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
- - name: Init environment
- uses: ./.github/actions/init-environment
- - name: Run linter
- run: |
- pylint `git diff --name-only --diff-filter=d origin/master HEAD | grep -E '\.py$' | tr '\n' ' '`
\ No newline at end of file
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
deleted file mode 100644
index ef06d34a..00000000
--- a/.github/workflows/ruff.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: Ruff
-on: [ push, pull_request ]
-jobs:
- ruff:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- - uses: chartboost/ruff-action@v1
diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml
deleted file mode 100644
index 172dc64b..00000000
--- a/.github/workflows/run_test.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: Python application test
-
-on: [push]
-
-jobs:
- build:
-
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v4
- - name: Set up Python 3.8
- uses: actions/setup-python@v4
- with:
- python-version: 3.8
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install pytest
- if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- - name: Run tests with pytest
- run: |
- find tests/ -name "*.py" | xargs pytest
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 65dc68d9..d9dafc76 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,3 +1,4 @@
+---
name: test
on:
@@ -9,7 +10,67 @@ on:
env:
POETRY_VERSION: "1.4.2"
-jobs:
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version:
+ - "3.8"
+ - "3.9"
+ - "3.10"
+ - "3.11"
+ test_type:
+ - "core"
+ - "extended"
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: "snok/install-poetry@v1"
+ with:
+ python-version: ${{ matrix.python-version }}
+ poetry-version: "1.4.2"
+ cache-key: ${{ matrix.test_type }}
+ install-command: |
+ if [ "${{ matrix.test_type }}" == "core" ]; then
+ echo "Running core tests, installing dependencies with poetry..."
+ poetry install
+ else
+ echo "Running extended tests, installing dependencies with poetry..."
+ poetry install -E extended_testing
+ fi
+ - name: Run ${{matrix.test_type}} tests
+ run: |
+ if [ "${{ matrix.test_type }}" == "core" ]; then
+ make test
+ else
+ make extended_tests
+ fi
+ shell: bash
+ name: Python ${{ matrix.python-version }} ${{ matrix.test_type }}
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: "./.github/actions/poetry_setup"
+ with:
+ python-version: ${{ matrix.python-version }}
+ poetry-version: "1.4.2"
+ cache-key: ${{ matrix.test_type }}
+ install-command: |
+ if [ "${{ matrix.test_type }}" == "core" ]; then
+ echo "Running core tests, installing dependencies with poetry..."
+ poetry install
+ else
+ echo "Running extended tests, installing dependencies with poetry..."
+ poetry install -E extended_testing
+ fi
+ - name: Run ${{matrix.test_type}} tests
+ run: |
+ if [ "${{ matrix.test_type }}" == "core" ]; then
+ make test
+ else
+ make extended_tests
+ fi
+ shell: bash
build:
runs-on: ubuntu-latest
strategy:
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
index c2c805f5..ae572d22 100644
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -19,7 +19,9 @@ jobs:
python-version: 3.x
- name: Install dependencies
- run: pip install -r requirements.txt
+ run: |
+ pip install -r requirements.txt
+ pip install pytest
- name: Run unit tests
- run: find tests/ -name "*.py" | xargs pytest
\ No newline at end of file
+ run: pytest
\ No newline at end of file
diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
index 42ac2271..0a1985a7 100644
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -18,16 +18,18 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v4
with:
- python-version: '3.10'
+ python-version: '3.9'
- name: Install dependencies
- run: pip install -r requirements.txt
+ run: |
+ pip install -r requirements.txt
+ pip install pytest
- name: Run Python unit tests
- run: python3 -m unittest tests/
+ run: pytest
- name: Verify that the Docker image for the action builds
run: docker build . --file Dockerfile
- name: Verify integration test results
- run: find tests/ -name "*.py" | xargs pytest
+ run: pytest
diff --git a/.gitignore b/.gitignore
index 8f8a98a8..716dc148 100644
--- a/.gitignore
+++ b/.gitignore
@@ -186,4 +186,5 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
\ No newline at end of file
+#.idea/
+.vscode/settings.json
diff --git a/.readthedocs.yml b/.readthedocs.yml
index fbdc74ec..e3e74fad 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -10,4 +10,4 @@ mkdocs:
python:
install:
- - requirements: requirements.txt
\ No newline at end of file
+ - requirements: requirements.txt
diff --git a/.yamllint b/.yamllint
new file mode 100644
index 00000000..c58fcd8f
--- /dev/null
+++ b/.yamllint
@@ -0,0 +1,4 @@
+rules:
+ line-length:
+ level: warning
+ allow-non-breakable-inline-mappings: true
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8230322d..21f4b51c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -72,7 +72,6 @@ pre-commit --version
Now when you make a git commit, the black code formatter and ruff linter will run.
-
Furthermore, we have integrated a pre-commit GitHub Action into our workflow. This means that with every pull request opened, the pre-commit checks will be automatically enforced, streamlining the code review process and ensuring that all contributions adhere to our quality standards.
To run the pre-commit tool, follow these steps:
@@ -124,13 +123,13 @@ You can learn more about mkdocs on the [mkdocs website](https://www.mkdocs.org/)
## ๐งช tests
- Run all the tests in the tests folder
-`find ./tests -name '*.py' -exec pytest {} \;`
-
+ ```pytest```
+
## Code Quality
-`quality.sh` runs 4 different code formatters for ultra reliable code cleanup using Autopep8, Black, Ruff, YAPF
+`code-quality.sh` runs 4 different code formatters for ultra reliable code cleanup using Autopep8, Black, Ruff, YAPF
1. Open your terminal.
-2. Change directory to where `quality.sh` is located using `cd` command:
+2. Change directory to where `code-quality.sh` is located using `cd` command:
```sh
cd /path/to/directory
```
@@ -142,17 +141,17 @@ You can learn more about mkdocs on the [mkdocs website](https://www.mkdocs.org/)
4. Run the script:
```sh
- ./quality.sh
+ ./code-quality.sh
```
If the script requires administrative privileges, you might need to run it with `sudo`:
```sh
-sudo ./quality.sh
+sudo ./code-quality.sh
```
-Please replace `/path/to/directory` with the actual path where the `quality.sh` script is located on your system.
+Please replace `/path/to/directory` with the actual path where the `code-quality.sh` script is located on your system.
-If you're asking for a specific content or functionality inside `quality.sh` related to YAPF or other code quality tools, you would need to edit the `quality.sh` script to include the desired commands, such as running YAPF on a directory. The contents of `quality.sh` would dictate exactly what happens when you run it.
+If you're asking for a specific content or functionality inside `code-quality.sh` related to YAPF or other code quality tools, you would need to edit the `code-quality.sh` script to include the desired commands, such as running YAPF on a directory. The contents of `code-quality.sh` would dictate exactly what happens when you run it.
## ๐ license
diff --git a/README.md b/README.md
index 2b104caf..d60a110a 100644
--- a/README.md
+++ b/README.md
@@ -34,28 +34,35 @@ Run example in Collab:
+
+Welcome to the documentation for the `GPT4VisionAPI` module! This module is a powerful wrapper for the OpenAI GPT-4 Vision model. It allows you to interact with the model to generate descriptions or answers related to images. This documentation will provide you with comprehensive information on how to use this module effectively.
+
+## Installation
+
+Before you start using the `GPT4VisionAPI` module, make sure you have the required dependencies installed. You can install them using the following commands:
+
+```bash
+pip3 install --upgrade swarms
+```
-## Installation
+## Module Overview
-To use the GPT4Vision Model API, you need to install the required dependencies and configure your environment. Follow these steps to get started:
+The `GPT4VisionAPI` module serves as a bridge between your application and the OpenAI GPT-4 Vision model. It allows you to send requests to the model and retrieve responses related to images. Here are some key features and functionality provided by this module:
-1. Install the required Python package:
+- Encoding images to base64 format.
+- Running the GPT-4 Vision model with specified tasks and images.
+- Customization options such as setting the OpenAI API key and maximum token limit.
- ```bash
- pip3 install --upgrade swarms
- ```
+## Class: GPT4VisionAPI
-2. Make sure you have an OpenAI API key. You can obtain one by signing up on the [OpenAI platform](https://beta.openai.com/signup/).
+The `GPT4VisionAPI` class is the core component of this module. It encapsulates the functionality required to interact with the GPT-4 Vision model. Below, we'll dive into the class in detail.
-3. Set your OpenAI API key as an environment variable. You can do this in your code or your environment configuration. Alternatively, you can provide the API key directly when initializing the `GPT4Vision` class.
+### Initialization
-## Initialization
+When initializing the `GPT4VisionAPI` class, you have the option to provide the OpenAI API key and set the maximum token limit. Here are the parameters and their descriptions:
-To start using the GPT4Vision Model API, you need to create an instance of the `GPT4Vision` class. You can customize its behavior by providing various configuration options, but it also comes with sensible defaults.
+| Parameter | Type | Default Value | Description |
+|---------------------|----------|-------------------------------|----------------------------------------------------------------------------------------------------------|
+| openai_api_key | str | `OPENAI_API_KEY` environment variable (if available) | The OpenAI API key. If not provided, it defaults to the `OPENAI_API_KEY` environment variable. |
+| max_tokens | int | 300 | The maximum number of tokens to generate in the model's response. |
-Here's how you can initialize the `GPT4Vision` class:
+Here's how you can initialize the `GPT4VisionAPI` class:
```python
-from swarms.models.gpt4v import GPT4Vision
-
-gpt4vision = GPT4Vision(
- api_key="Your Key"
-)
-```
-
-The above code initializes the `GPT4Vision` class with default settings. You can adjust these settings as needed.
-
-## Methods
-
-### `process_img`
+from swarms.models import GPT4VisionAPI
-The `process_img` method is used to preprocess an image before sending it to the GPT-4 Vision model. It takes the image path as input and returns the processed image in a format suitable for API requests.
+# Initialize with default API key and max_tokens
+api = GPT4VisionAPI()
-```python
-processed_img = gpt4vision.process_img(img_path)
+# Initialize with custom API key and max_tokens
+custom_api_key = "your_custom_api_key"
+api = GPT4VisionAPI(openai_api_key=custom_api_key, max_tokens=500)
```
-- `img_path` (str): The file path or URL of the image to be processed.
+### Methods
-### `__call__`
+#### encode_image
-The `__call__` method is the main method for interacting with the GPT-4 Vision model. It sends the image and tasks to the model and returns the generated response.
+This method allows you to encode an image from a URL to base64 format. It's a utility function used internally by the module.
```python
-response = gpt4vision(img, tasks)
-```
-
-- `img` (Union[str, List[str]]): Either a single image URL or a list of image URLs to be used for the API request.
-- `tasks` (List[str]): A list of tasks or questions related to the image(s).
-
-This method returns a `GPT4VisionResponse` object, which contains the generated answer.
-
-### `run`
+def encode_image(img: str) -> str:
+ """
+ Encode image to base64.
-The `run` method is an alternative way to interact with the GPT-4 Vision model. It takes a single task and image URL as input and returns the generated response.
+ Parameters:
+ - img (str): URL of the image to encode.
-```python
-response = gpt4vision.run(task, img)
+ Returns:
+ str: Base64 encoded image.
+ """
```
-- `task` (str): The task or question related to the image.
-- `img` (str): The image URL to be used for the API request.
-
-This method simplifies interactions when dealing with a single task and image.
-
-### `arun`
+#### run
-The `arun` method is an asynchronous version of the `run` method. It allows for asynchronous processing of API requests, which can be useful in certain scenarios.
+The `run` method is the primary way to interact with the GPT-4 Vision model. It sends a request to the model with a task and an image URL, and it returns the model's response.
```python
-import asyncio
+def run(task: str, img: str) -> str:
+ """
+ Run the GPT-4 Vision model.
-async def main():
- response = await gpt4vision.arun(task, img)
- print(response)
+ Parameters:
+ - task (str): The task or question related to the image.
+ - img (str): URL of the image to analyze.
-loop = asyncio.get_event_loop()
-loop.run_until_complete(main())
+ Returns:
+ str: The model's response.
+ """
```
-- `task` (str): The task or question related to the image.
-- `img` (str): The image URL to be used for the API request.
-
-## Configuration Options
-
-The `GPT4Vision` class provides several configuration options that allow you to customize its behavior:
+#### __call__
-- `max_retries` (int): The maximum number of retries to make to the API. Default: 3
-- `backoff_factor` (float): The backoff factor to use for exponential backoff. Default: 2.0
-- `timeout_seconds` (int): The timeout in seconds for the API request. Default: 10
-- `api_key` (str): The API key to use for the API request. Default: None (set via environment variable)
-- `quality` (str): The quality of the image to generate. Options: 'low' or 'high'. Default: 'low'
-- `max_tokens` (int): The maximum number of tokens to use for the API request. Default: 200
-
-## Usage Examples
-
-### Example 1: Generating Image Descriptions
-
-```python
-gpt4vision = GPT4Vision()
-img = "https://example.com/image.jpg"
-tasks = ["Describe this image."]
-response = gpt4vision(img, tasks)
-print(response.answer)
-```
-
-In this example, we create an instance of `GPT4Vision`, provide an image URL, and ask the model to describe the image. The response contains the generated description.
-
-### Example 2: Custom Configuration
+The `__call__` method is a convenient way to run the GPT-4 Vision model. It has the same functionality as the `run` method.
```python
-custom_config = {
- "max_retries": 5,
- "timeout_seconds": 20,
- "quality": "high",
- "max_tokens": 300,
-}
-gpt4vision = GPT4Vision(**custom_config)
-img = "https://example.com/another_image.jpg"
-tasks = ["What objects can you identify in this image?"]
-response = gpt4vision(img, tasks)
-print(response.answer)
-```
+def __call__(task: str, img: str) -> str:
+ """
+ Run the GPT-4 Vision model (callable).
-In this example, we create an instance of `GPT4Vision` with custom configuration options. We set a higher timeout, request high-quality images, and allow more tokens in the response.
+ Parameters:
+ - task (str): The task or question related to the image.
+ - img
-### Example 3: Using the `run` Method
+ (str): URL of the image to analyze.
-```python
-gpt4vision = GPT4Vision()
-img = "https://example.com/image.jpg"
-task = "Describe this image in detail."
-response = gpt4vision.run(task, img)
-print(response)
+ Returns:
+ str: The model's response.
+ """
```
-In this example, we use the `run` method to simplify the interaction by providing a single task and image URL.
+## Examples
-# Model Usage and Image Understanding
+Let's explore some usage examples of the `GPT4VisionAPI` module to better understand how to use it effectively.
-The GPT-4 Vision model processes images in a unique way, allowing it to answer questions about both or each of the images independently. Here's an overview:
+### Example 1: Basic Usage
-| Purpose | Description |
-| --------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
-| Image Understanding | The model is shown two copies of the same image and can answer questions about both or each of the images independently. |
+In this example, we'll use the module with the default API key and maximum tokens to analyze an image.
-# Image Detail Control
+```python
+from swarms.models import GPT4VisionAPI
-You have control over how the model processes the image and generates textual understanding by using the `detail` parameter, which has two options: `low` and `high`.
+# Initialize with default API key and max_tokens
+api = GPT4VisionAPI()
-| Detail | Description |
-| -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| low | Disables the "high-res" model. The model receives a low-res 512 x 512 version of the image and represents the image with a budget of 65 tokens. Ideal for use cases not requiring high detail. |
-| high | Enables "high-res" mode. The model first sees the low-res image and then creates detailed crops of input images as 512px squares based on the input image size. Uses a total of 129 tokens. |
+# Define the task and image URL
+task = "What is the color of the object?"
+img = "https://i.imgur.com/2M2ZGwC.jpeg"
-# Managing Images
+# Run the GPT-4 Vision model
+response = api.run(task, img)
-To use the Chat Completions API effectively, you must manage the images you pass to the model. Here are some key considerations:
+# Print the model's response
+print(response)
+```
-| Management Aspect | Description |
-| ------------------------- | ------------------------------------------------------------------------------------------------- |
-| Image Reuse | To pass the same image multiple times, include the image with each API request. |
-| Image Size Optimization | Improve latency by downsizing images to meet the expected size requirements. |
-| Image Deletion | After processing, images are deleted from OpenAI servers and not retained. No data is used for training. |
+### Example 2: Custom API Key
-# Limitations
+If you have a custom API key, you can initialize the module with it as shown in this example.
-While GPT-4 with Vision is powerful, it has some limitations:
+```python
+from swarms.models import GPT4VisionAPI
-| Limitation | Description |
-| -------------------------------------------- | --------------------------------------------------------------------------------------------------- |
-| Medical Images | Not suitable for interpreting specialized medical images like CT scans. |
-| Non-English Text | May not perform optimally when handling non-Latin alphabets, such as Japanese or Korean. |
-| Large Text in Images | Enlarge text within images for readability, but avoid cropping important details. |
-| Rotated or Upside-Down Text/Images | May misinterpret rotated or upside-down text or images. |
-| Complex Visual Elements | May struggle to understand complex graphs or text with varying colors or styles. |
-| Spatial Reasoning | Struggles with tasks requiring precise spatial localization, such as identifying chess positions. |
-| Accuracy | May generate incorrect descriptions or captions in certain scenarios. |
-| Panoramic and Fisheye Images | Struggles with panoramic and fisheye images. |
+# Initialize with custom API key and max_tokens
+custom_api_key = "your_custom_api_key"
+api = GPT4VisionAPI(openai_api_key=custom_api_key, max_tokens=500)
-# Calculating Costs
+# Define the task and image URL
+task = "What is the object in the image?"
+img = "https://i.imgur.com/3T3ZHwD.jpeg"
-Image inputs are metered and charged in tokens. The token cost depends on the image size and detail option.
+# Run the GPT-4 Vision model
+response = api.run(task, img)
-| Example | Token Cost |
-| --------------------------------------------- | ----------- |
-| 1024 x 1024 square image in detail: high mode | 765 tokens |
-| 2048 x 4096 image in detail: high mode | 1105 tokens |
-| 4096 x 8192 image in detail: low mode | 85 tokens |
+# Print the model's response
+print(response)
+```
-# FAQ
+### Example 3: Adjusting Maximum Tokens
-Here are some frequently asked questions about GPT-4 with Vision:
+You can also customize the maximum token limit when initializing the module. In this example, we set it to 1000 tokens.
-| Question | Answer |
-| -------------------------------------------- | -------------------------------------------------------------------------------------------------- |
-| Fine-Tuning Image Capabilities | No, fine-tuning the image capabilities of GPT-4 is not supported at this time. |
-| Generating Images | GPT-4 is used for understanding images, not generating them. |
-| Supported Image File Types | Supported image file types include PNG (.png), JPEG (.jpeg and .jpg), WEBP (.webp), and non-animated GIF (.gif). |
-| Image Size Limitations | Image uploads are restricted to 20MB per image. |
-| Image Deletion | Uploaded images are automatically deleted after processing by the model. |
-| Learning More | For more details about GPT-4 with Vision, refer to the GPT-4 with Vision system card. |
-| CAPTCHA Submission | CAPTCHAs are blocked for safety reasons. |
-| Rate Limits | Image processing counts toward your tokens per minute (TPM) limit. Refer to the calculating costs section for details. |
-| Image Metadata | The model does not receive image metadata. |
-| Handling Unclear Images | If an image is unclear, the model will do its best to interpret it, but results may be less accurate. |
+```python
+from swarms.models import GPT4VisionAPI
+# Initialize with default API key and custom max_tokens
+api = GPT4VisionAPI(max_tokens=1000)
+# Define the task and image URL
+task = "Describe the scene in the image."
+img = "https://i.imgur.com/4P4ZRxU.jpeg"
-## Additional Tips
+# Run the GPT-4 Vision model
+response = api.run(task, img)
-- Make sure to handle potential exceptions and errors when making API requests. The library includes retries and error handling, but it's essential to handle exceptions gracefully in your code.
-- Experiment with different configuration options to optimize the trade-off between response quality and response time based on your specific requirements.
+# Print the model's response
+print(response)
+```
-## References and Resources
+## Additional Information
-- [OpenAI Platform](https://beta.openai.com/signup/): Sign up for an OpenAI API key.
-- [OpenAI API Documentation](https://platform.openai.com/docs/api-reference/chat/create): Official API documentation for the GPT-4 Vision model.
+- If you encounter any errors or issues with the module, make sure to check your API key and internet connectivity.
+- It's recommended to handle exceptions when using the module to gracefully handle errors.
+- You can further customize the module to fit your specific use case by modifying the code as needed.
-Now you have a comprehensive understanding of the GPT4Vision Model API, its configuration options, and how to use it for various computer vision and natural language processing tasks. Start experimenting and integrating it into your projects to leverage the power of GPT-4 Vision for image-related tasks.
+## References
-# Conclusion
+- [OpenAI API Documentation](https://beta.openai.com/docs/)
-With GPT-4 Vision, you have a powerful tool for understanding and generating textual descriptions for images. By considering its capabilities, limitations, and cost calculations, you can effectively leverage this model for various image-related tasks.
\ No newline at end of file
+This documentation provides a comprehensive guide on how to use the `GPT4VisionAPI` module effectively. It covers initialization, methods, usage examples, and additional information to ensure a smooth experience when working with the GPT-4 Vision model.
\ No newline at end of file
diff --git a/example.py b/example.py
index ab496b77..06b5d5f2 100644
--- a/example.py
+++ b/example.py
@@ -1,9 +1,21 @@
+import os
+
+from dotenv import load_dotenv
+
+# Import the OpenAIChat model and the Flow struct
from swarms.models import OpenAIChat
from swarms.structs import Flow
+# Load the environment variables
+load_dotenv()
+
+# Get the API key from the environment
+api_key = os.environ.get("OPENAI_API_KEY")
+
# Initialize the language model
llm = OpenAIChat(
temperature=0.5,
+ openai_api_key=api_key,
)
diff --git a/mkdocs.yml b/mkdocs.yml
index e70e1a39..3a4e6691 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -6,9 +6,6 @@ copyright: "© APAC Corp, Inc."
extra_css:
- docs/assets/css/extra.css
extra:
- # analytics:
- # provider: google
- # property: G-QM8EDPSCB6
social:
- icon: fontawesome/solid/house
link: assets/img/SwarmsLogoIcon.png
@@ -19,15 +16,15 @@ extra:
- icon: fontawesome/brands/python
link: https://pypi.org/project/Swarms/
theme:
- name: material
- custom_dir: docs/overrides
- logo: assets/img/SwarmsLogoIcon.png
- palette:
- # Palette toggle for light mode
+ name: material
+ custom_dir: docs/overrides
+ logo: assets/img/SwarmsLogoIcon.png
+ palette:
+ # Palette toggle for light mode
- scheme: default
primary: black
toggle:
- icon: material/brightness-7
+ icon: material/brightness-7
name: Switch to dark mode
# Palette toggle for dark mode
- scheme: slate
@@ -35,14 +32,14 @@ theme:
toggle:
icon: material/brightness-4
name: Switch to light mode
- features:
- - content.code.copy
- - content.code.annotate
- - navigation.tabs
- - navigation.sections
- - navigation.expand
- - navigation.top
- - announce.dismiss
+ features:
+ - content.code.copy
+ - content.code.annotate
+ - navigation.tabs
+ - navigation.sections
+ - navigation.expand
+ - navigation.top
+ - announce.dismiss
markdown_extensions:
- pymdownx.highlight:
anchor_linenums: true
@@ -58,80 +55,77 @@ markdown_extensions:
- def_list
- footnotes
nav:
-- Home:
- - Overview: "index.md"
- - Contributing: "contributing.md"
- - Docker Container Setup: "docker_setup.md"
-- Swarms:
- - Overview: "swarms/index.md"
+ - Home:
+ - Overview: "index.md"
+ - Contributing: "contributing.md"
+ - Docker Container Setup: "docker_setup.md"
+ - Swarms:
+ - Overview: "swarms/index.md"
- swarms.swarms:
- - AbstractSwarm: "swarms/swarms/abstractswarm.md"
- - GodMode: "swarms/swarms/godmode.md"
- - Groupchat: "swarms/swarms/groupchat.md"
+ - AbstractSwarm: "swarms/swarms/abstractswarm.md"
+ - GodMode: "swarms/swarms/godmode.md"
+ - Groupchat: "swarms/swarms/groupchat.md"
- swarms.workers:
- - Overview: "swarms/workers/index.md"
- - AbstractWorker: "swarms/workers/abstract_worker.md"
+ - Overview: "swarms/workers/index.md"
+ - AbstractWorker: "swarms/workers/abstract_worker.md"
- swarms.agents:
- - AbstractAgent: "swarms/agents/abstract_agent.md"
- - OmniModalAgent: "swarms/agents/omni_agent.md"
+ - AbstractAgent: "swarms/agents/abstract_agent.md"
+ - OmniModalAgent: "swarms/agents/omni_agent.md"
- swarms.models:
- - Language:
- - Overview: "swarms/models/index.md"
- - HuggingFaceLLM: "swarms/models/huggingface.md"
- - Anthropic: "swarms/models/anthropic.md"
- - OpenAI: "swarms/models/openai.md"
- - Zephyr: "swarms/models/zephyr.md"
- - BioGPT: "swarms/models/biogpt.md"
- - MPT7B: "swarms/models/mpt.md"
- - Mistral: "swarms/models/mistral.md"
- - MultiModal:
- - Fuyu: "swarms/models/fuyu.md"
- - Vilt: "swarms/models/vilt.md"
- - Idefics: "swarms/models/idefics.md"
- - BingChat: "swarms/models/bingchat.md"
- - Kosmos: "swarms/models/kosmos.md"
- - Nougat: "swarms/models/nougat.md"
- - Dalle3: "swarms/models/dalle3.md"
- - GPT4V: "swarms/models/gpt4v.md"
- - LayoutLMDocumentQA: "swarms/models/layoutlm_document_qa.md"
- - DistilWhisperModel: "swarms/models/distilled_whisperx.md"
- - ElevenLabsText2SpeechTool: "swarms/models/elevenlabs.md"
+ - Language:
+ - Overview: "swarms/models/index.md"
+ - HuggingFaceLLM: "swarms/models/huggingface.md"
+ - Anthropic: "swarms/models/anthropic.md"
+ - OpenAI: "swarms/models/openai.md"
+ - Zephyr: "swarms/models/zephyr.md"
+ - BioGPT: "swarms/models/biogpt.md"
+ - MPT7B: "swarms/models/mpt.md"
+ - Mistral: "swarms/models/mistral.md"
+ - MultiModal:
+ - Fuyu: "swarms/models/fuyu.md"
+ - Vilt: "swarms/models/vilt.md"
+ - Idefics: "swarms/models/idefics.md"
+ - BingChat: "swarms/models/bingchat.md"
+ - Kosmos: "swarms/models/kosmos.md"
+ - Nougat: "swarms/models/nougat.md"
+ - Dalle3: "swarms/models/dalle3.md"
+ - GPT4V: "swarms/models/gpt4v.md"
+ - LayoutLMDocumentQA: "swarms/models/layoutlm_document_qa.md"
+ - DistilWhisperModel: "swarms/models/distilled_whisperx.md"
+ - ElevenLabsText2SpeechTool: "swarms/models/elevenlabs.md"
- swarms.structs:
- - Overview: "swarms/structs/overview.md"
- - AutoScaler: "swarms/swarms/autoscaler.md"
- - Flow: "swarms/structs/flow.md"
- - SequentialWorkflow: 'swarms/structs/sequential_workflow.md'
+ - Overview: "swarms/structs/overview.md"
+ - AutoScaler: "swarms/swarms/autoscaler.md"
+ - Flow: "swarms/structs/flow.md"
+ - SequentialWorkflow: 'swarms/structs/sequential_workflow.md'
- swarms.memory:
- - PineconeVectorStoreStore: "swarms/memory/pinecone.md"
- - PGVectorStore: "swarms/memory/pg.md"
- # - swarms.chunkers:
- # - BaseChunker: "swarms/chunkers/basechunker.md"
- # - PdfChunker: "swarms/chunkers/pdf_chunker.md"
-- Guides:
- - Overview: "examples/index.md"
- - Agents:
+ - PineconeVectorStoreStore: "swarms/memory/pinecone.md"
+ - PGVectorStore: "swarms/memory/pg.md"
+ - Guides:
+ - Overview: "examples/index.md"
+ - Agents:
- Flow: "examples/flow.md"
- SequentialWorkflow: "examples/reliable_autonomous_agents.md"
- OmniAgent: "examples/omni_agent.md"
- - 2O+ Autonomous Agent Blogs: "examples/ideas.md"
-- Applications:
- - CustomerSupport:
- - Overview: "applications/customer_support.md"
- - Marketing:
- - Overview: "applications/marketing_agencies.md"
-- Corporate:
- - FAQ: "corporate/faq.md"
- - Purpose: "corporate/purpose.md"
- - Roadmap: "corporate/roadmap.md"
- - Weaknesses: "corporate/failures.md"
- - Design: "corporate/design.md"
- - Flywheel: "corporate/flywheel.md"
- - Bounties: "corporate/bounties.md"
- - Metric: "corporate/metric.md"
- - Distribution: "corporate/distribution"
- - Research: "corporate/research.md"
- - Demos: "corporate/demos.md"
- - Architecture: "corporate/architecture.md"
- - Checklist: "corporate/checklist.md"
- - Hiring: "corporate/hiring.md"
- - SwarmCloud: "corporate/swarm_cloud.md"
+ - 2O+ Autonomous Agent Blogs: "examples/ideas.md"
+ - Applications:
+ - CustomerSupport:
+ - Overview: "applications/customer_support.md"
+ - Marketing:
+ - Overview: "applications/marketing_agencies.md"
+ - Corporate:
+ - FAQ: "corporate/faq.md"
+ - Purpose: "corporate/purpose.md"
+ - Roadmap: "corporate/roadmap.md"
+ - Weaknesses: "corporate/failures.md"
+ - Design: "corporate/design.md"
+ - Flywheel: "corporate/flywheel.md"
+ - Bounties: "corporate/bounties.md"
+ - Metric: "corporate/metric.md"
+ - Distribution: "corporate/distribution"
+ - Research: "corporate/research.md"
+ - Demos: "corporate/demos.md"
+ - Architecture: "corporate/architecture.md"
+ - Checklist: "corporate/checklist.md"
+ - Hiring: "corporate/hiring.md"
+ - SwarmCloud: "corporate/swarm_cloud.md"
diff --git a/multi_agent_debate.py b/multi_agent_debate.py
deleted file mode 100644
index 2bc67c8c..00000000
--- a/multi_agent_debate.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-
-from swarms.models import OpenAIChat
-from swarms.structs import Flow
-from swarms.swarms.multi_agent_collab import MultiAgentCollaboration
-
-load_dotenv()
-
-api_key = os.environ.get("OPENAI_API_KEY")
-
-# Initialize the language model
-llm = OpenAIChat(
- temperature=0.5,
- openai_api_key=api_key,
-)
-
-
-## Initialize the workflow
-flow = Flow(llm=llm, max_loops=1, dashboard=True)
-flow2 = Flow(llm=llm, max_loops=1, dashboard=True)
-flow3 = Flow(llm=llm, max_loops=1, dashboard=True)
-
-
-swarm = MultiAgentCollaboration(
- agents=[flow, flow2, flow3],
- max_iters=4,
-)
-
-swarm.run("Generate a 10,000 word blog on health and wellness.")
diff --git a/multi_modal_auto_agent.py b/multi_modal_auto_agent.py
new file mode 100644
index 00000000..3e0a05cc
--- /dev/null
+++ b/multi_modal_auto_agent.py
@@ -0,0 +1,20 @@
+from swarms.structs import Flow
+from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
+ MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+)
+
+
+llm = GPT4VisionAPI()
+
+task = "What is the color of the object?"
+img = "images/swarms.jpeg"
+
+## Initialize the workflow
+flow = Flow(
+ llm=llm,
+ sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+ max_loops="auto",
+)
+
+flow.run(task=task, img=img)
diff --git a/playground/agents/revgpt_agent.py b/playground/agents/revgpt_agent.py
deleted file mode 100644
index 16a720e8..00000000
--- a/playground/agents/revgpt_agent.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import os
-from dotenv import load_dotenv
-from swarms.models.revgptV4 import RevChatGPTModel
-from swarms.workers.worker import Worker
-
-load_dotenv()
-
-config = {
- "model": os.getenv("REVGPT_MODEL"),
- "plugin_ids": [os.getenv("REVGPT_PLUGIN_IDS")],
- "disable_history": os.getenv("REVGPT_DISABLE_HISTORY") == "True",
- "PUID": os.getenv("REVGPT_PUID"),
- "unverified_plugin_domains": [
- os.getenv("REVGPT_UNVERIFIED_PLUGIN_DOMAINS")
- ],
-}
-
-llm = RevChatGPTModel(access_token=os.getenv("ACCESS_TOKEN"), **config)
-
-worker = Worker(ai_name="Optimus Prime", llm=llm)
-
-task = (
- "What were the winning boston marathon times for the past 5 years (ending"
- " in 2022)? Generate a table of the year, name, country of origin, and"
- " times."
-)
-response = worker.run(task)
-print(response)
diff --git a/playground/demos/assembly/assembly.py b/playground/demos/assembly/assembly.py
new file mode 100644
index 00000000..6722489a
--- /dev/null
+++ b/playground/demos/assembly/assembly.py
@@ -0,0 +1,24 @@
+from swarms.structs import Flow
+from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
+ MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+)
+
+llm = GPT4VisionAPI()
+
+task = (
+ "Analyze this image of an assembly line and identify any issues such as"
+ " misaligned parts, defects, or deviations from the standard assembly"
+ " process. IF there is anything unsafe in the image, explain why it is"
+ " unsafe and how it could be improved."
+)
+img = "assembly_line.jpg"
+
+## Initialize the workflow
+flow = Flow(
+ llm=llm,
+ max_loops=1,
+ dashboard=True,
+)
+
+flow.run(task=task, img=img)
diff --git a/playground/demos/assembly/assembly_line.jpg b/playground/demos/assembly/assembly_line.jpg
new file mode 100644
index 00000000..df35c2e3
Binary files /dev/null and b/playground/demos/assembly/assembly_line.jpg differ
diff --git a/playground/demos/autotemp/autotemp_example.py b/playground/demos/autotemp/autotemp_example.py
index 9047268d..c5f86416 100644
--- a/playground/demos/autotemp/autotemp_example.py
+++ b/playground/demos/autotemp/autotemp_example.py
@@ -1,5 +1,5 @@
from swarms.models import OpenAIChat
-from swarms.models.autotemp import AutoTemp
+from autotemp import AutoTemp
# Your OpenAI API key
api_key = ""
diff --git a/playground/demos/blog_gen/blog_gen.py b/playground/demos/autotemp/blog_gen.py
similarity index 99%
rename from playground/demos/blog_gen/blog_gen.py
rename to playground/demos/autotemp/blog_gen.py
index 84ab240d..85079f70 100644
--- a/playground/demos/blog_gen/blog_gen.py
+++ b/playground/demos/autotemp/blog_gen.py
@@ -1,7 +1,7 @@
import os
from termcolor import colored
from swarms.models import OpenAIChat
-from swarms.models.autotemp import AutoTemp
+from autotemp import AutoTemp
from swarms.structs import SequentialWorkflow
diff --git a/playground/demos/blog_gen/blog_gen_example.py b/playground/demos/autotemp/blog_gen_example.py
similarity index 92%
rename from playground/demos/blog_gen/blog_gen_example.py
rename to playground/demos/autotemp/blog_gen_example.py
index 7cf95535..2c2f1e24 100644
--- a/playground/demos/blog_gen/blog_gen_example.py
+++ b/playground/demos/autotemp/blog_gen_example.py
@@ -1,5 +1,5 @@
import os
-from swarms.swarms.blog_gen import BlogGen
+from blog_gen import BlogGen
def main():
diff --git a/playground/demos/idea_2_img/main.py b/playground/demos/idea_2_img/main.py
new file mode 100644
index 00000000..84ce67ab
--- /dev/null
+++ b/playground/demos/idea_2_img/main.py
@@ -0,0 +1,7 @@
+"""
+Idea 2 img
+
+task -> gpt4 text -> dalle3 img -> gpt4vision img + text analyze img -> dalle3 img -> loop
+
+"""
+from swarms.models.gpt4_vision_api import GPT4VisionAPI
diff --git a/playground/demos/jarvis_multi_modal_auto_agent/jarvis.py b/playground/demos/jarvis_multi_modal_auto_agent/jarvis.py
new file mode 100644
index 00000000..3e0a05cc
--- /dev/null
+++ b/playground/demos/jarvis_multi_modal_auto_agent/jarvis.py
@@ -0,0 +1,20 @@
+from swarms.structs import Flow
+from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
+ MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+)
+
+
+llm = GPT4VisionAPI()
+
+task = "What is the color of the object?"
+img = "images/swarms.jpeg"
+
+## Initialize the workflow
+flow = Flow(
+ llm=llm,
+ sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+ max_loops="auto",
+)
+
+flow.run(task=task, img=img)
diff --git a/playground/demos/multi_modal_autonomous_agents/multi_modal_auto_agent.py b/playground/demos/multi_modal_autonomous_agents/multi_modal_auto_agent.py
index a2602706..f5462847 100644
--- a/playground/demos/multi_modal_autonomous_agents/multi_modal_auto_agent.py
+++ b/playground/demos/multi_modal_autonomous_agents/multi_modal_auto_agent.py
@@ -1,33 +1,17 @@
from swarms.structs import Flow
-from swarms.models import Idefics
+from swarms.models.gpt4_vision_api import GPT4VisionAPI
-# Multi Modality Auto Agent
-llm = Idefics(max_length=2000)
-task = (
- "User: What is in this image?"
- " https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG"
-)
+llm = GPT4VisionAPI()
+
+task = "What is the color of the object?"
+img = "images/swarms.jpeg"
## Initialize the workflow
flow = Flow(
llm=llm,
- max_loops=2,
+ max_loops="auto",
dashboard=True,
- # stopping_condition=None, # You can define a stopping condition as needed.
- # loop_interval=1,
- # retry_attempts=3,
- # retry_interval=1,
- # interactive=False, # Set to 'True' for interactive mode.
- # dynamic_temperature=False, # Set to 'True' for dynamic temperature handling.
)
-# out = flow.load_state("flow_state.json")
-# temp = flow.dynamic_temperature()
-# filter = flow.add_response_filter("Trump")
-out = flow.run(task)
-# out = flow.validate_response(out)
-# out = flow.analyze_feedback(out)
-# out = flow.print_history_and_memory()
-# # out = flow.save_state("flow_state.json")
-# print(out)
+flow.run(task=task, img=img)
diff --git a/playground/demos/positive_med/positive_med.py b/playground/demos/positive_med/positive_med.py
index ea0c7c4e..36fecaa1 100644
--- a/playground/demos/positive_med/positive_med.py
+++ b/playground/demos/positive_med/positive_med.py
@@ -15,26 +15,29 @@ Distribution Agent:
- Social Media posts for the article.
# TODO
+- Make prompts better
- Add shorter and better topic generator prompt
- Optimize writer prompt to create longer and more enjoyeable blogs
- Use Local Models like Storywriter
"""
+import os
+
from termcolor import colored
+
from swarms.models import OpenAIChat
-from swarms.prompts.autoblogen import (
+from swarms.prompts.autobloggen import (
+ AUTOBLOG_REVIEW_PROMPT,
DRAFT_AGENT_SYSTEM_PROMPT,
- REVIEW_PROMPT,
SOCIAL_MEDIA_SYSTEM_PROMPT_AGENT,
- TOPIC_GENERATOR,
+ TOPIC_GENERATOR_SYSTEM_PROMPT,
)
-import os
api_key = os.environ["OPENAI_API_KEY"]
llm = OpenAIChat(openai_api_key=api_key)
def get_review_prompt(article):
- prompt = REVIEW_PROMPT.replace("{{ARTICLE}}", article)
+ prompt = AUTOBLOG_REVIEW_PROMPT.replace("{{ARTICLE}}", article)
return prompt
@@ -50,8 +53,8 @@ topic_selection_task = (
"Generate 10 topics on gaining mental clarity using ancient practices"
)
topics = llm(
- f"Your System Instructions: {TOPIC_GENERATOR}, Your current task:"
- f" {topic_selection_task}"
+ f"Your System Instructions: {TOPIC_GENERATOR_SYSTEM_PROMPT}, Your current"
+ f" task: {topic_selection_task}"
)
dashboard = print(
diff --git a/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg b/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg
new file mode 100644
index 00000000..5e9a0fff
Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg differ
diff --git a/playground/demos/swarm_of_mma_manufacturing/main.py b/playground/demos/swarm_of_mma_manufacturing/main.py
new file mode 100644
index 00000000..d3705418
--- /dev/null
+++ b/playground/demos/swarm_of_mma_manufacturing/main.py
@@ -0,0 +1,127 @@
+"""
+Swarm of multi modal autonomous agents for manufacturing!
+---------------------------------------------------------
+Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest
+Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest
+Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest
+Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest
+Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest
+Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest
+Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest
+
+
+Flow:
+health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
+"""
+from swarms.structs import Flow
+import os
+from dotenv import load_dotenv
+from swarms.models import GPT4VisionAPI
+
+load_dotenv()
+api_key = os.getenv("OPENAI_API_KEY")
+
+
+llm = GPT4VisionAPI(
+ openai_api_key=api_key
+)
+
+assembly_line = "playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg"
+red_robots = "playground/demos/swarm_of_mma_manufacturing/red_robots.jpg"
+robots = "playground/demos/swarm_of_mma_manufacturing/robots.jpg"
+tesla_assembly_line = "playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg"
+
+
+# Define detailed prompts for each agent
+tasks = {
+ "health_safety": (
+ "Analyze the factory's working environment for health safety. Focus on"
+ " cleanliness, ventilation, spacing between workstations, and personal"
+ " protective equipment availability."
+ ),
+ "productivity": (
+ "Review the factory's workflow efficiency, machine utilization, and"
+ " employee engagement. Identify operational delays or bottlenecks."
+ ),
+ "safety": (
+ "Analyze the factory's safety measures, including fire exits, safety"
+ " signage, and emergency response equipment."
+ ),
+ "security": (
+ "Evaluate the factory's security systems, entry/exit controls, and"
+ " potential vulnerabilities."
+ ),
+ "sustainability": (
+ "Inspect the factory's sustainability practices, including waste"
+ " management, energy usage, and eco-friendly processes."
+ ),
+ "efficiency": (
+ "Assess the manufacturing process's efficiency, considering the layout,"
+ " logistics, and automation level."
+ ),
+}
+
+
+# Define prompts for each agent
+health_safety_prompt = tasks["health_safety"]
+productivity_prompt = tasks["productivity"]
+safety_prompt = tasks["safety"]
+security_prompt = tasks["security"]
+sustainability_prompt = tasks["sustainability"]
+efficiency_prompt = tasks["efficiency"]
+
+
+# Health security agent
+health_security_agent = Flow(
+ llm=llm,
+ sop_list=health_safety_prompt,
+ max_loops=2,
+ multi_modal=True
+)
+
+# Quality control agent
+productivity_check_agent = Flow(
+ llm=llm,
+ sop=productivity_prompt,
+ max_loops=2,
+ multi_modal=True
+)
+
+# Security agent
+security_check_agent = Flow(
+ llm=llm,
+ sop=security_prompt,
+ max_loops=2,
+ multi_modal=True
+)
+
+# Efficiency agent
+efficiency_check_agent = Flow(
+ llm=llm,
+ sop=efficiency_prompt,
+ max_loops=2,
+ multi_modal=True
+)
+
+
+# Add the first task to the health_security_agent
+health_check = health_security_agent.run(
+ "Analyze the safety of this factory",
+ robots
+)
+
+# Add the third task to the productivity_check_agent
+productivity_check = productivity_check_agent.run(
+ health_check, assembly_line
+)
+
+# Add the fourth task to the security_check_agent
+security_check = security_check_agent.add(
+ productivity_check, red_robots
+)
+
+# Add the fifth task to the efficiency_check_agent
+efficiency_check = efficiency_check_agent.run(
+ security_check, tesla_assembly_line
+)
+
diff --git a/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg b/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg
new file mode 100644
index 00000000..f086fa67
Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg differ
diff --git a/playground/demos/swarm_of_mma_manufacturing/robots.jpg b/playground/demos/swarm_of_mma_manufacturing/robots.jpg
new file mode 100644
index 00000000..bddab6e4
Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/robots.jpg differ
diff --git a/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg b/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg
new file mode 100644
index 00000000..00456f61
Binary files /dev/null and b/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg differ
diff --git a/playground/models/nougat.py b/playground/models/nougat.py
index 198fee38..97e1f1a3 100644
--- a/playground/models/nougat.py
+++ b/playground/models/nougat.py
@@ -2,4 +2,4 @@ from swarms.models.nougat import Nougat
nougat = Nougat()
-out = nougat("path/to/image.png")
+out = nougat("large.png")
diff --git a/playground/models/stable_diffusion.py b/playground/models/stable_diffusion.py
new file mode 100644
index 00000000..3bb77c39
--- /dev/null
+++ b/playground/models/stable_diffusion.py
@@ -0,0 +1,112 @@
+import os
+import base64
+import requests
+from dotenv import load_dotenv
+from typing import List
+
+load_dotenv()
+
+class StableDiffusion:
+ """
+ A class to interact with the Stable Diffusion API for image generation.
+
+ Attributes:
+ -----------
+ api_key : str
+ The API key for accessing the Stable Diffusion API.
+ api_host : str
+ The host URL of the Stable Diffusion API.
+ engine_id : str
+ The ID of the Stable Diffusion engine.
+ headers : dict
+ The headers for the API request.
+ output_dir : str
+ Directory where generated images will be saved.
+
+ Methods:
+ --------
+ generate_image(prompt: str, cfg_scale: int, height: int, width: int, samples: int, steps: int) -> List[str]:
+ Generates images based on a text prompt and returns a list of file paths to the generated images.
+ """
+
+ def __init__(self, api_key: str, api_host: str = "https://api.stability.ai"):
+ """
+ Initializes the StableDiffusion class with the provided API key and host.
+
+ Parameters:
+ -----------
+ api_key : str
+ The API key for accessing the Stable Diffusion API.
+ api_host : str
+ The host URL of the Stable Diffusion API. Default is "https://api.stability.ai".
+ """
+ self.api_key = api_key
+ self.api_host = api_host
+ self.engine_id = "stable-diffusion-v1-6"
+ self.headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json",
+ "Accept": "application/json"
+ }
+ self.output_dir = "images"
+ os.makedirs(self.output_dir, exist_ok=True)
+
+ def generate_image(self, prompt: str, cfg_scale: int = 7, height: int = 1024, width: int = 1024, samples: int = 1, steps: int = 30) -> List[str]:
+ """
+ Generates images based on a text prompt.
+
+ Parameters:
+ -----------
+ prompt : str
+ The text prompt based on which the image will be generated.
+ cfg_scale : int
+ CFG scale parameter for image generation. Default is 7.
+ height : int
+ Height of the generated image. Default is 1024.
+ width : int
+ Width of the generated image. Default is 1024.
+ samples : int
+ Number of images to generate. Default is 1.
+ steps : int
+ Number of steps for the generation process. Default is 30.
+
+ Returns:
+ --------
+ List[str]:
+ A list of paths to the generated images.
+
+ Raises:
+ -------
+ Exception:
+ If the API response is not 200 (OK).
+ """
+ response = requests.post(
+ f"{self.api_host}/v1/generation/{self.engine_id}/text-to-image",
+ headers=self.headers,
+ json={
+ "text_prompts": [{"text": prompt}],
+ "cfg_scale": cfg_scale,
+ "height": height,
+ "width": width,
+ "samples": samples,
+ "steps": steps,
+ },
+ )
+
+ if response.status_code != 200:
+ raise Exception(f"Non-200 response: {response.text}")
+
+ data = response.json()
+ image_paths = []
+ for i, image in enumerate(data["artifacts"]):
+ image_path = os.path.join(self.output_dir, f"v1_txt2img_{i}.png")
+ with open(image_path, "wb") as f:
+ f.write(base64.b64decode(image["base64"]))
+ image_paths.append(image_path)
+
+ return image_paths
+
+# Usage example:
+# sd = StableDiffusion("your-api-key")
+# images = sd.generate_image("A scenic landscape with mountains")
+# print(images)
diff --git a/pyproject.toml b/pyproject.toml
index eea95362..351442f9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "swarms"
-version = "2.4.0"
+version = "2.4.5"
description = "Swarms - Pytorch"
license = "MIT"
authors = ["Kye Gomez "]
@@ -18,11 +18,11 @@ classifiers = [
"Intended Audience :: Developers",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: MIT License",
- "Programming Language :: Python :: 3.6"
+ "Programming Language :: Python :: 3.10"
]
[tool.poetry.dependencies]
-python = "^3.8.1"
+python = "^3.9.1"
torch = "2.1.1"
transformers = "*"
openai = "0.28.0"
diff --git a/swarms/__init__.py b/swarms/__init__.py
index d876c04e..9ceb78f2 100644
--- a/swarms/__init__.py
+++ b/swarms/__init__.py
@@ -1,18 +1,6 @@
-import logging
-import os
-import warnings
+from swarms.utils.disable_logging import disable_logging
-warnings.filterwarnings("ignore", category=UserWarning)
-
-# disable tensorflow warnings
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
-
-try:
- log = logging.getLogger("pytorch")
- log.propagate = False
- log.setLevel(logging.ERROR)
-except Exception as error:
- print(f"Pytorch logging not disabled: {error}")
+disable_logging()
from swarms.agents import * # noqa: E402, F403
from swarms.swarms import * # noqa: E402, F403
diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py
index 10bf2fab..b7f3b8ce 100644
--- a/swarms/models/__init__.py
+++ b/swarms/models/__init__.py
@@ -20,15 +20,16 @@ from swarms.models.mpt import MPT7B # noqa: E402
# MultiModal Models
from swarms.models.idefics import Idefics # noqa: E402
-
-# from swarms.models.kosmos_two import Kosmos # noqa: E402
from swarms.models.vilt import Vilt # noqa: E402
from swarms.models.nougat import Nougat # noqa: E402
from swarms.models.layoutlm_document_qa import LayoutLMDocumentQA # noqa: E402
+from swarms.models.gpt4_vision_api import GPT4VisionAPI # noqa: E402
# from swarms.models.gpt4v import GPT4Vision
# from swarms.models.dalle3 import Dalle3
# from swarms.models.distilled_whisperx import DistilWhisperModel # noqa: E402
+# from swarms.models.whisperx_model import WhisperX # noqa: E402
+# from swarms.models.kosmos_two import Kosmos # noqa: E402
__all__ = [
"Anthropic",
@@ -49,4 +50,6 @@ __all__ = [
"WizardLLMStoryTeller",
# "GPT4Vision",
# "Dalle3",
+ # "DistilWhisperModel",
+ "GPT4VisionAPI",
]
diff --git a/swarms/models/base_multimodal_model.py b/swarms/models/base_multimodal_model.py
new file mode 100644
index 00000000..73ec66ff
--- /dev/null
+++ b/swarms/models/base_multimodal_model.py
@@ -0,0 +1,266 @@
+from abc import abstractmethod
+import asyncio
+import base64
+import concurrent.futures
+import time
+from concurrent import ThreadPoolExecutor
+from io import BytesIO
+from typing import List, Optional, Tuple
+
+import requests
+from PIL import Image
+from termcolor import colored
+
+
+class BaseMultiModalModel:
+ """
+ Base class for multimodal models
+
+
+ Args:
+ model_name (Optional[str], optional): Model name. Defaults to None.
+ temperature (Optional[int], optional): Temperature. Defaults to 0.5.
+ max_tokens (Optional[int], optional): Max tokens. Defaults to 500.
+ max_workers (Optional[int], optional): Max workers. Defaults to 10.
+ top_p (Optional[int], optional): Top p. Defaults to 1.
+ top_k (Optional[int], optional): Top k. Defaults to 50.
+ beautify (Optional[bool], optional): Beautify. Defaults to False.
+ device (Optional[str], optional): Device. Defaults to "cuda".
+ max_new_tokens (Optional[int], optional): Max new tokens. Defaults to 500.
+ retries (Optional[int], optional): Retries. Defaults to 3.
+
+ Examples:
+ >>> from swarms.models.base_multimodal_model import BaseMultiModalModel
+ >>> model = BaseMultiModalModel()
+ >>> model.run("Generate a summary of this text")
+ >>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")
+ >>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"])
+ >>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
+ >>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"])
+ >>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
+ >>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"])
+ >>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
+ >>> model.generate_summary("Generate a summary of this text")
+ >>> model.set_temperature(0.5)
+ >>> model.set_max_tokens(500)
+ >>> model.get_generation_time()
+ >>> model.get_chat_history()
+ >>> model.get_unique_chat_history()
+ >>> model.get_chat_history_length()
+ >>> model.get_unique_chat_history_length()
+ >>> model.get_chat_history_tokens()
+ >>> model.print_beautiful("Print this beautifully")
+ >>> model.stream("Stream this")
+ >>> model.unique_chat_history()
+ >>> model.clear_chat_history()
+ >>> model.get_img_from_web("https://www.google.com/images/branding/googlelogo/")
+
+ """
+ def __init__(
+ self,
+ model_name: Optional[str],
+ temperature: Optional[int] = 0.5,
+ max_tokens: Optional[int] = 500,
+ max_workers: Optional[int] = 10,
+ top_p: Optional[int] = 1,
+ top_k: Optional[int] = 50,
+ beautify: Optional[bool] = False,
+ device: Optional[str] = "cuda",
+ max_new_tokens: Optional[int] = 500,
+ retries: Optional[int] = 3,
+ ):
+ self.model_name = model_name
+ self.temperature = temperature
+ self.max_tokens = max_tokens
+ self.max_workers = max_workers
+ self.top_p = top_p
+ self.top_k = top_k
+ self.beautify = beautify
+ self.device = device
+ self.max_new_tokens = max_new_tokens
+ self.retries = retries
+ self.chat_history = []
+
+ @abstractmethod
+ def __call__(self, text: str, img: str):
+ """Run the model"""
+ pass
+
+ def run(self, task: str, img: str):
+ """Run the model"""
+ pass
+
+ async def arun(self, task: str, img: str):
+ """Run the model asynchronously"""
+ pass
+
+ def get_img_from_web(self, img: str):
+ """Get the image from the web"""
+ try:
+ response = requests.get(img)
+ response.raise_for_status()
+ image_pil = Image.open(BytesIO(response.content))
+ return image_pil
+ except requests.RequestException as error:
+ print(f"Error fetching image from {img} and error: {error}")
+ return None
+
+ def encode_img(self, img: str):
+ """Encode the image to base64"""
+ with open(img, "rb") as image_file:
+ return base64.b64encode(image_file.read()).decode("utf-8")
+
+ def get_img(self, img: str):
+ """Get the image from the path"""
+ image_pil = Image.open(img)
+ return image_pil
+
+ def clear_chat_history(self):
+ """Clear the chat history"""
+ self.chat_history = []
+
+ def run_many(
+ self,
+ tasks: List[str],
+ imgs: List[str],
+ ):
+ """
+ Run the model on multiple tasks and images all at once using concurrent
+
+ Args:
+ tasks (List[str]): List of tasks
+ imgs (List[str]): List of image paths
+
+ Returns:
+ List[str]: List of responses
+
+
+ """
+ # Instantiate the thread pool executor
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+ results = executor.map(self.run, tasks, imgs)
+
+ # Print the results for debugging
+ for result in results:
+ print(result)
+
+ def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]:
+ """Process a batch of tasks and images"""
+ with concurrent.futures.ThreadPoolExecutor() as executor:
+ futures = [
+ executor.submit(self.run, task, img)
+ for task, img in tasks_images
+ ]
+ results = [future.result() for future in futures]
+ return results
+
+ async def run_batch_async(
+ self, tasks_images: List[Tuple[str, str]]
+ ) -> List[str]:
+ """Process a batch of tasks and images asynchronously"""
+ loop = asyncio.get_event_loop()
+ futures = [
+ loop.run_in_executor(None, self.run, task, img)
+ for task, img in tasks_images
+ ]
+ return await asyncio.gather(*futures)
+
+ async def run_batch_async_with_retries(
+ self, tasks_images: List[Tuple[str, str]]
+ ) -> List[str]:
+ """Process a batch of tasks and images asynchronously with retries"""
+ loop = asyncio.get_event_loop()
+ futures = [
+ loop.run_in_executor(None, self.run_with_retries, task, img)
+ for task, img in tasks_images
+ ]
+ return await asyncio.gather(*futures)
+
+ def unique_chat_history(self):
+ """Get the unique chat history"""
+ return list(set(self.chat_history))
+
+ def run_with_retries(self, task: str, img: str):
+ """Run the model with retries"""
+ for i in range(self.retries):
+ try:
+ return self.run(task, img)
+ except Exception as error:
+ print(f"Error with the request {error}")
+ continue
+
+ def run_batch_with_retries(self, tasks_images: List[Tuple[str, str]]):
+ """Run the model with retries"""
+ for i in range(self.retries):
+ try:
+ return self.run_batch(tasks_images)
+ except Exception as error:
+ print(f"Error with the request {error}")
+ continue
+
+ def _tokens_per_second(self) -> float:
+ """Tokens per second"""
+ elapsed_time = self.end_time - self.start_time
+ if elapsed_time == 0:
+ return float("inf")
+ return self._num_tokens() / elapsed_time
+
+ def _time_for_generation(self, task: str) -> float:
+ """Time for Generation"""
+ self.start_time = time.time()
+ self.run(task)
+ self.end_time = time.time()
+ return self.end_time - self.start_time
+
+ @abstractmethod
+ def generate_summary(self, text: str) -> str:
+ """Generate Summary"""
+ pass
+
+ def set_temperature(self, value: float):
+ """Set Temperature"""
+ self.temperature = value
+
+ def set_max_tokens(self, value: int):
+ """Set new max tokens"""
+ self.max_tokens = value
+
+ def get_generation_time(self) -> float:
+ """Get generation time"""
+ if self.start_time and self.end_time:
+ return self.end_time - self.start_time
+ return 0
+
+ def get_chat_history(self):
+ """Get the chat history"""
+ return self.chat_history
+
+ def get_unique_chat_history(self):
+ """Get the unique chat history"""
+ return list(set(self.chat_history))
+
+ def get_chat_history_length(self):
+ """Get the chat history length"""
+ return len(self.chat_history)
+
+ def get_unique_chat_history_length(self):
+ """Get the unique chat history length"""
+ return len(list(set(self.chat_history)))
+
+ def get_chat_history_tokens(self):
+ """Get the chat history tokens"""
+ return self._num_tokens()
+
+ def print_beautiful(self, content: str, color: str = "cyan"):
+ """Print Beautifully with termcolor"""
+ content = colored(content, color)
+ print(content)
+
+ def stream(self, content: str):
+ """Stream the output
+
+ Args:
+ content (str): _description_
+ """
+ for chunk in content:
+ print(chunk)
diff --git a/swarms/models/fast_vit_classes.json b/swarms/models/fast_vit_classes.json
deleted file mode 100644
index 57434253..00000000
--- a/swarms/models/fast_vit_classes.json
+++ /dev/null
@@ -1,1000 +0,0 @@
-["tench",
-"goldfish",
-"great white shark",
-"tiger shark",
-"hammerhead shark",
-"electric ray",
-"stingray",
-"cock",
-"hen",
-"ostrich",
-"brambling",
-"goldfinch",
-"house finch",
-"junco",
-"indigo bunting",
-"American robin",
-"bulbul",
-"jay",
-"magpie",
-"chickadee",
-"American dipper",
-"kite",
-"bald eagle",
-"vulture",
-"great grey owl",
-"fire salamander",
-"smooth newt",
-"newt",
-"spotted salamander",
-"axolotl",
-"American bullfrog",
-"tree frog",
-"tailed frog",
-"loggerhead sea turtle",
-"leatherback sea turtle",
-"mud turtle",
-"terrapin",
-"box turtle",
-"banded gecko",
-"green iguana",
-"Carolina anole",
-"desert grassland whiptail lizard",
-"agama",
-"frilled-necked lizard",
-"alligator lizard",
-"Gila monster",
-"European green lizard",
-"chameleon",
-"Komodo dragon",
-"Nile crocodile",
-"American alligator",
-"triceratops",
-"worm snake",
-"ring-necked snake",
-"eastern hog-nosed snake",
-"smooth green snake",
-"kingsnake",
-"garter snake",
-"water snake",
-"vine snake",
-"night snake",
-"boa constrictor",
-"African rock python",
-"Indian cobra",
-"green mamba",
-"sea snake",
-"Saharan horned viper",
-"eastern diamondback rattlesnake",
-"sidewinder",
-"trilobite",
-"harvestman",
-"scorpion",
-"yellow garden spider",
-"barn spider",
-"European garden spider",
-"southern black widow",
-"tarantula",
-"wolf spider",
-"tick",
-"centipede",
-"black grouse",
-"ptarmigan",
-"ruffed grouse",
-"prairie grouse",
-"peacock",
-"quail",
-"partridge",
-"grey parrot",
-"macaw",
-"sulphur-crested cockatoo",
-"lorikeet",
-"coucal",
-"bee eater",
-"hornbill",
-"hummingbird",
-"jacamar",
-"toucan",
-"duck",
-"red-breasted merganser",
-"goose",
-"black swan",
-"tusker",
-"echidna",
-"platypus",
-"wallaby",
-"koala",
-"wombat",
-"jellyfish",
-"sea anemone",
-"brain coral",
-"flatworm",
-"nematode",
-"conch",
-"snail",
-"slug",
-"sea slug",
-"chiton",
-"chambered nautilus",
-"Dungeness crab",
-"rock crab",
-"fiddler crab",
-"red king crab",
-"American lobster",
-"spiny lobster",
-"crayfish",
-"hermit crab",
-"isopod",
-"white stork",
-"black stork",
-"spoonbill",
-"flamingo",
-"little blue heron",
-"great egret",
-"bittern",
-"crane (bird)",
-"limpkin",
-"common gallinule",
-"American coot",
-"bustard",
-"ruddy turnstone",
-"dunlin",
-"common redshank",
-"dowitcher",
-"oystercatcher",
-"pelican",
-"king penguin",
-"albatross",
-"grey whale",
-"killer whale",
-"dugong",
-"sea lion",
-"Chihuahua",
-"Japanese Chin",
-"Maltese",
-"Pekingese",
-"Shih Tzu",
-"King Charles Spaniel",
-"Papillon",
-"toy terrier",
-"Rhodesian Ridgeback",
-"Afghan Hound",
-"Basset Hound",
-"Beagle",
-"Bloodhound",
-"Bluetick Coonhound",
-"Black and Tan Coonhound",
-"Treeing Walker Coonhound",
-"English foxhound",
-"Redbone Coonhound",
-"borzoi",
-"Irish Wolfhound",
-"Italian Greyhound",
-"Whippet",
-"Ibizan Hound",
-"Norwegian Elkhound",
-"Otterhound",
-"Saluki",
-"Scottish Deerhound",
-"Weimaraner",
-"Staffordshire Bull Terrier",
-"American Staffordshire Terrier",
-"Bedlington Terrier",
-"Border Terrier",
-"Kerry Blue Terrier",
-"Irish Terrier",
-"Norfolk Terrier",
-"Norwich Terrier",
-"Yorkshire Terrier",
-"Wire Fox Terrier",
-"Lakeland Terrier",
-"Sealyham Terrier",
-"Airedale Terrier",
-"Cairn Terrier",
-"Australian Terrier",
-"Dandie Dinmont Terrier",
-"Boston Terrier",
-"Miniature Schnauzer",
-"Giant Schnauzer",
-"Standard Schnauzer",
-"Scottish Terrier",
-"Tibetan Terrier",
-"Australian Silky Terrier",
-"Soft-coated Wheaten Terrier",
-"West Highland White Terrier",
-"Lhasa Apso",
-"Flat-Coated Retriever",
-"Curly-coated Retriever",
-"Golden Retriever",
-"Labrador Retriever",
-"Chesapeake Bay Retriever",
-"German Shorthaired Pointer",
-"Vizsla",
-"English Setter",
-"Irish Setter",
-"Gordon Setter",
-"Brittany Spaniel",
-"Clumber Spaniel",
-"English Springer Spaniel",
-"Welsh Springer Spaniel",
-"Cocker Spaniels",
-"Sussex Spaniel",
-"Irish Water Spaniel",
-"Kuvasz",
-"Schipperke",
-"Groenendael",
-"Malinois",
-"Briard",
-"Australian Kelpie",
-"Komondor",
-"Old English Sheepdog",
-"Shetland Sheepdog",
-"collie",
-"Border Collie",
-"Bouvier des Flandres",
-"Rottweiler",
-"German Shepherd Dog",
-"Dobermann",
-"Miniature Pinscher",
-"Greater Swiss Mountain Dog",
-"Bernese Mountain Dog",
-"Appenzeller Sennenhund",
-"Entlebucher Sennenhund",
-"Boxer",
-"Bullmastiff",
-"Tibetan Mastiff",
-"French Bulldog",
-"Great Dane",
-"St. Bernard",
-"husky",
-"Alaskan Malamute",
-"Siberian Husky",
-"Dalmatian",
-"Affenpinscher",
-"Basenji",
-"pug",
-"Leonberger",
-"Newfoundland",
-"Pyrenean Mountain Dog",
-"Samoyed",
-"Pomeranian",
-"Chow Chow",
-"Keeshond",
-"Griffon Bruxellois",
-"Pembroke Welsh Corgi",
-"Cardigan Welsh Corgi",
-"Toy Poodle",
-"Miniature Poodle",
-"Standard Poodle",
-"Mexican hairless dog",
-"grey wolf",
-"Alaskan tundra wolf",
-"red wolf",
-"coyote",
-"dingo",
-"dhole",
-"African wild dog",
-"hyena",
-"red fox",
-"kit fox",
-"Arctic fox",
-"grey fox",
-"tabby cat",
-"tiger cat",
-"Persian cat",
-"Siamese cat",
-"Egyptian Mau",
-"cougar",
-"lynx",
-"leopard",
-"snow leopard",
-"jaguar",
-"lion",
-"tiger",
-"cheetah",
-"brown bear",
-"American black bear",
-"polar bear",
-"sloth bear",
-"mongoose",
-"meerkat",
-"tiger beetle",
-"ladybug",
-"ground beetle",
-"longhorn beetle",
-"leaf beetle",
-"dung beetle",
-"rhinoceros beetle",
-"weevil",
-"fly",
-"bee",
-"ant",
-"grasshopper",
-"cricket",
-"stick insect",
-"cockroach",
-"mantis",
-"cicada",
-"leafhopper",
-"lacewing",
-"dragonfly",
-"damselfly",
-"red admiral",
-"ringlet",
-"monarch butterfly",
-"small white",
-"sulphur butterfly",
-"gossamer-winged butterfly",
-"starfish",
-"sea urchin",
-"sea cucumber",
-"cottontail rabbit",
-"hare",
-"Angora rabbit",
-"hamster",
-"porcupine",
-"fox squirrel",
-"marmot",
-"beaver",
-"guinea pig",
-"common sorrel",
-"zebra",
-"pig",
-"wild boar",
-"warthog",
-"hippopotamus",
-"ox",
-"water buffalo",
-"bison",
-"ram",
-"bighorn sheep",
-"Alpine ibex",
-"hartebeest",
-"impala",
-"gazelle",
-"dromedary",
-"llama",
-"weasel",
-"mink",
-"European polecat",
-"black-footed ferret",
-"otter",
-"skunk",
-"badger",
-"armadillo",
-"three-toed sloth",
-"orangutan",
-"gorilla",
-"chimpanzee",
-"gibbon",
-"siamang",
-"guenon",
-"patas monkey",
-"baboon",
-"macaque",
-"langur",
-"black-and-white colobus",
-"proboscis monkey",
-"marmoset",
-"white-headed capuchin",
-"howler monkey",
-"titi",
-"Geoffroy's spider monkey",
-"common squirrel monkey",
-"ring-tailed lemur",
-"indri",
-"Asian elephant",
-"African bush elephant",
-"red panda",
-"giant panda",
-"snoek",
-"eel",
-"coho salmon",
-"rock beauty",
-"clownfish",
-"sturgeon",
-"garfish",
-"lionfish",
-"pufferfish",
-"abacus",
-"abaya",
-"academic gown",
-"accordion",
-"acoustic guitar",
-"aircraft carrier",
-"airliner",
-"airship",
-"altar",
-"ambulance",
-"amphibious vehicle",
-"analog clock",
-"apiary",
-"apron",
-"waste container",
-"assault rifle",
-"backpack",
-"bakery",
-"balance beam",
-"balloon",
-"ballpoint pen",
-"Band-Aid",
-"banjo",
-"baluster",
-"barbell",
-"barber chair",
-"barbershop",
-"barn",
-"barometer",
-"barrel",
-"wheelbarrow",
-"baseball",
-"basketball",
-"bassinet",
-"bassoon",
-"swimming cap",
-"bath towel",
-"bathtub",
-"station wagon",
-"lighthouse",
-"beaker",
-"military cap",
-"beer bottle",
-"beer glass",
-"bell-cot",
-"bib",
-"tandem bicycle",
-"bikini",
-"ring binder",
-"binoculars",
-"birdhouse",
-"boathouse",
-"bobsleigh",
-"bolo tie",
-"poke bonnet",
-"bookcase",
-"bookstore",
-"bottle cap",
-"bow",
-"bow tie",
-"brass",
-"bra",
-"breakwater",
-"breastplate",
-"broom",
-"bucket",
-"buckle",
-"bulletproof vest",
-"high-speed train",
-"butcher shop",
-"taxicab",
-"cauldron",
-"candle",
-"cannon",
-"canoe",
-"can opener",
-"cardigan",
-"car mirror",
-"carousel",
-"tool kit",
-"carton",
-"car wheel",
-"automated teller machine",
-"cassette",
-"cassette player",
-"castle",
-"catamaran",
-"CD player",
-"cello",
-"mobile phone",
-"chain",
-"chain-link fence",
-"chain mail",
-"chainsaw",
-"chest",
-"chiffonier",
-"chime",
-"china cabinet",
-"Christmas stocking",
-"church",
-"movie theater",
-"cleaver",
-"cliff dwelling",
-"cloak",
-"clogs",
-"cocktail shaker",
-"coffee mug",
-"coffeemaker",
-"coil",
-"combination lock",
-"computer keyboard",
-"confectionery store",
-"container ship",
-"convertible",
-"corkscrew",
-"cornet",
-"cowboy boot",
-"cowboy hat",
-"cradle",
-"crane (machine)",
-"crash helmet",
-"crate",
-"infant bed",
-"Crock Pot",
-"croquet ball",
-"crutch",
-"cuirass",
-"dam",
-"desk",
-"desktop computer",
-"rotary dial telephone",
-"diaper",
-"digital clock",
-"digital watch",
-"dining table",
-"dishcloth",
-"dishwasher",
-"disc brake",
-"dock",
-"dog sled",
-"dome",
-"doormat",
-"drilling rig",
-"drum",
-"drumstick",
-"dumbbell",
-"Dutch oven",
-"electric fan",
-"electric guitar",
-"electric locomotive",
-"entertainment center",
-"envelope",
-"espresso machine",
-"face powder",
-"feather boa",
-"filing cabinet",
-"fireboat",
-"fire engine",
-"fire screen sheet",
-"flagpole",
-"flute",
-"folding chair",
-"football helmet",
-"forklift",
-"fountain",
-"fountain pen",
-"four-poster bed",
-"freight car",
-"French horn",
-"frying pan",
-"fur coat",
-"garbage truck",
-"gas mask",
-"gas pump",
-"goblet",
-"go-kart",
-"golf ball",
-"golf cart",
-"gondola",
-"gong",
-"gown",
-"grand piano",
-"greenhouse",
-"grille",
-"grocery store",
-"guillotine",
-"barrette",
-"hair spray",
-"half-track",
-"hammer",
-"hamper",
-"hair dryer",
-"hand-held computer",
-"handkerchief",
-"hard disk drive",
-"harmonica",
-"harp",
-"harvester",
-"hatchet",
-"holster",
-"home theater",
-"honeycomb",
-"hook",
-"hoop skirt",
-"horizontal bar",
-"horse-drawn vehicle",
-"hourglass",
-"iPod",
-"clothes iron",
-"jack-o'-lantern",
-"jeans",
-"jeep",
-"T-shirt",
-"jigsaw puzzle",
-"pulled rickshaw",
-"joystick",
-"kimono",
-"knee pad",
-"knot",
-"lab coat",
-"ladle",
-"lampshade",
-"laptop computer",
-"lawn mower",
-"lens cap",
-"paper knife",
-"library",
-"lifeboat",
-"lighter",
-"limousine",
-"ocean liner",
-"lipstick",
-"slip-on shoe",
-"lotion",
-"speaker",
-"loupe",
-"sawmill",
-"magnetic compass",
-"mail bag",
-"mailbox",
-"tights",
-"tank suit",
-"manhole cover",
-"maraca",
-"marimba",
-"mask",
-"match",
-"maypole",
-"maze",
-"measuring cup",
-"medicine chest",
-"megalith",
-"microphone",
-"microwave oven",
-"military uniform",
-"milk can",
-"minibus",
-"miniskirt",
-"minivan",
-"missile",
-"mitten",
-"mixing bowl",
-"mobile home",
-"Model T",
-"modem",
-"monastery",
-"monitor",
-"moped",
-"mortar",
-"square academic cap",
-"mosque",
-"mosquito net",
-"scooter",
-"mountain bike",
-"tent",
-"computer mouse",
-"mousetrap",
-"moving van",
-"muzzle",
-"nail",
-"neck brace",
-"necklace",
-"nipple",
-"notebook computer",
-"obelisk",
-"oboe",
-"ocarina",
-"odometer",
-"oil filter",
-"organ",
-"oscilloscope",
-"overskirt",
-"bullock cart",
-"oxygen mask",
-"packet",
-"paddle",
-"paddle wheel",
-"padlock",
-"paintbrush",
-"pajamas",
-"palace",
-"pan flute",
-"paper towel",
-"parachute",
-"parallel bars",
-"park bench",
-"parking meter",
-"passenger car",
-"patio",
-"payphone",
-"pedestal",
-"pencil case",
-"pencil sharpener",
-"perfume",
-"Petri dish",
-"photocopier",
-"plectrum",
-"Pickelhaube",
-"picket fence",
-"pickup truck",
-"pier",
-"piggy bank",
-"pill bottle",
-"pillow",
-"ping-pong ball",
-"pinwheel",
-"pirate ship",
-"pitcher",
-"hand plane",
-"planetarium",
-"plastic bag",
-"plate rack",
-"plow",
-"plunger",
-"Polaroid camera",
-"pole",
-"police van",
-"poncho",
-"billiard table",
-"soda bottle",
-"pot",
-"potter's wheel",
-"power drill",
-"prayer rug",
-"printer",
-"prison",
-"projectile",
-"projector",
-"hockey puck",
-"punching bag",
-"purse",
-"quill",
-"quilt",
-"race car",
-"racket",
-"radiator",
-"radio",
-"radio telescope",
-"rain barrel",
-"recreational vehicle",
-"reel",
-"reflex camera",
-"refrigerator",
-"remote control",
-"restaurant",
-"revolver",
-"rifle",
-"rocking chair",
-"rotisserie",
-"eraser",
-"rugby ball",
-"ruler",
-"running shoe",
-"safe",
-"safety pin",
-"salt shaker",
-"sandal",
-"sarong",
-"saxophone",
-"scabbard",
-"weighing scale",
-"school bus",
-"schooner",
-"scoreboard",
-"CRT screen",
-"screw",
-"screwdriver",
-"seat belt",
-"sewing machine",
-"shield",
-"shoe store",
-"shoji",
-"shopping basket",
-"shopping cart",
-"shovel",
-"shower cap",
-"shower curtain",
-"ski",
-"ski mask",
-"sleeping bag",
-"slide rule",
-"sliding door",
-"slot machine",
-"snorkel",
-"snowmobile",
-"snowplow",
-"soap dispenser",
-"soccer ball",
-"sock",
-"solar thermal collector",
-"sombrero",
-"soup bowl",
-"space bar",
-"space heater",
-"space shuttle",
-"spatula",
-"motorboat",
-"spider web",
-"spindle",
-"sports car",
-"spotlight",
-"stage",
-"steam locomotive",
-"through arch bridge",
-"steel drum",
-"stethoscope",
-"scarf",
-"stone wall",
-"stopwatch",
-"stove",
-"strainer",
-"tram",
-"stretcher",
-"couch",
-"stupa",
-"submarine",
-"suit",
-"sundial",
-"sunglass",
-"sunglasses",
-"sunscreen",
-"suspension bridge",
-"mop",
-"sweatshirt",
-"swimsuit",
-"swing",
-"switch",
-"syringe",
-"table lamp",
-"tank",
-"tape player",
-"teapot",
-"teddy bear",
-"television",
-"tennis ball",
-"thatched roof",
-"front curtain",
-"thimble",
-"threshing machine",
-"throne",
-"tile roof",
-"toaster",
-"tobacco shop",
-"toilet seat",
-"torch",
-"totem pole",
-"tow truck",
-"toy store",
-"tractor",
-"semi-trailer truck",
-"tray",
-"trench coat",
-"tricycle",
-"trimaran",
-"tripod",
-"triumphal arch",
-"trolleybus",
-"trombone",
-"tub",
-"turnstile",
-"typewriter keyboard",
-"umbrella",
-"unicycle",
-"upright piano",
-"vacuum cleaner",
-"vase",
-"vault",
-"velvet",
-"vending machine",
-"vestment",
-"viaduct",
-"violin",
-"volleyball",
-"waffle iron",
-"wall clock",
-"wallet",
-"wardrobe",
-"military aircraft",
-"sink",
-"washing machine",
-"water bottle",
-"water jug",
-"water tower",
-"whiskey jug",
-"whistle",
-"wig",
-"window screen",
-"window shade",
-"Windsor tie",
-"wine bottle",
-"wing",
-"wok",
-"wooden spoon",
-"wool",
-"split-rail fence",
-"shipwreck",
-"yawl",
-"yurt",
-"website",
-"comic book",
-"crossword",
-"traffic sign",
-"traffic light",
-"dust jacket",
-"menu",
-"plate",
-"guacamole",
-"consomme",
-"hot pot",
-"trifle",
-"ice cream",
-"ice pop",
-"baguette",
-"bagel",
-"pretzel",
-"cheeseburger",
-"hot dog",
-"mashed potato",
-"cabbage",
-"broccoli",
-"cauliflower",
-"zucchini",
-"spaghetti squash",
-"acorn squash",
-"butternut squash",
-"cucumber",
-"artichoke",
-"bell pepper",
-"cardoon",
-"mushroom",
-"Granny Smith",
-"strawberry",
-"orange",
-"lemon",
-"fig",
-"pineapple",
-"banana",
-"jackfruit",
-"custard apple",
-"pomegranate",
-"hay",
-"carbonara",
-"chocolate syrup",
-"dough",
-"meatloaf",
-"pizza",
-"pot pie",
-"burrito",
-"red wine",
-"espresso",
-"cup",
-"eggnog",
-"alp",
-"bubble",
-"cliff",
-"coral reef",
-"geyser",
-"lakeshore",
-"promontory",
-"shoal",
-"seashore",
-"valley",
-"volcano",
-"baseball player",
-"bridegroom",
-"scuba diver",
-"rapeseed",
-"daisy",
-"yellow lady's slipper",
-"corn",
-"acorn",
-"rose hip",
-"horse chestnut seed",
-"coral fungus",
-"agaric",
-"gyromitra",
-"stinkhorn mushroom",
-"earth star",
-"hen-of-the-woods",
-"bolete",
-"ear of corn",
-"toilet paper"]
\ No newline at end of file
diff --git a/swarms/models/fuyu.py b/swarms/models/fuyu.py
index ed955260..79dc1c47 100644
--- a/swarms/models/fuyu.py
+++ b/swarms/models/fuyu.py
@@ -63,9 +63,9 @@ class Fuyu:
def __call__(self, text: str, img: str):
"""Call the model with text and img paths"""
- image_pil = Image.open(img)
+ img = self.get_img(img)
model_inputs = self.processor(
- text=text, images=[image_pil], device=self.device_map
+ text=text, images=[img], device=self.device_map
)
for k, v in model_inputs.items():
@@ -79,13 +79,13 @@ class Fuyu:
)
return print(str(text))
- def get_img_from_web(self, img_url: str):
+ def get_img_from_web(self, img: str):
"""Get the image from the web"""
try:
- response = requests.get(img_url)
+ response = requests.get(img)
response.raise_for_status()
image_pil = Image.open(BytesIO(response.content))
return image_pil
except requests.RequestException as error:
- print(f"Error fetching image from {img_url} and error: {error}")
+ print(f"Error fetching image from {img} and error: {error}")
return None
diff --git a/swarms/models/gpt4_vision_api.py b/swarms/models/gpt4_vision_api.py
new file mode 100644
index 00000000..0370b2c2
--- /dev/null
+++ b/swarms/models/gpt4_vision_api.py
@@ -0,0 +1,421 @@
+import asyncio
+import base64
+import concurrent.futures
+import json
+import logging
+import os
+from concurrent.futures import ThreadPoolExecutor
+from typing import List, Optional, Tuple
+
+import aiohttp
+import requests
+from dotenv import load_dotenv
+from termcolor import colored
+
+try:
+ import cv2
+except ImportError:
+ print("OpenCV not installed. Please install OpenCV to use this model.")
+ raise ImportError
+
+# Load environment variables
+load_dotenv()
+openai_api_key = os.getenv("OPENAI_API_KEY")
+
+
+class GPT4VisionAPI:
+ """
+ GPT-4 Vision API
+
+ This class is a wrapper for the OpenAI API. It is used to run the GPT-4 Vision model.
+
+ Parameters
+ ----------
+ openai_api_key : str
+ The OpenAI API key. Defaults to the OPENAI_API_KEY environment variable.
+ max_tokens : int
+ The maximum number of tokens to generate. Defaults to 300.
+
+
+ Methods
+ -------
+ encode_image(img: str)
+ Encode image to base64.
+ run(task: str, img: str)
+ Run the model.
+ __call__(task: str, img: str)
+ Run the model.
+
+ Examples:
+ ---------
+ >>> from swarms.models import GPT4VisionAPI
+ >>> llm = GPT4VisionAPI()
+ >>> task = "What is the color of the object?"
+ >>> img = "https://i.imgur.com/2M2ZGwC.jpeg"
+ >>> llm.run(task, img)
+
+
+ """
+
+ def __init__(
+ self,
+ openai_api_key: str = openai_api_key,
+ model_name: str = "gpt-4-vision-preview",
+ logging_enabled: bool = False,
+ max_workers: int = 10,
+ max_tokens: str = 300,
+ openai_proxy: str = "https://api.openai.com/v1/chat/completions",
+ beautify: bool = False,
+ streaming_enabled: Optional[bool] = False,
+ ):
+ super().__init__()
+ self.openai_api_key = openai_api_key
+ self.logging_enabled = logging_enabled
+ self.model_name = model_name
+ self.max_workers = max_workers
+ self.max_tokens = max_tokens
+ self.openai_proxy = openai_proxy
+ self.beautify = beautify
+ self.streaming_enabled = streaming_enabled
+
+ if self.logging_enabled:
+ logging.basicConfig(level=logging.DEBUG)
+ else:
+ # Disable debug logs for requests and urllib3
+ logging.getLogger("requests").setLevel(logging.WARNING)
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+ def encode_image(self, img: str):
+ """Encode image to base64."""
+ with open(img, "rb") as image_file:
+ return base64.b64encode(image_file.read()).decode("utf-8")
+
+ def download_img_then_encode(self, img: str):
+ """Download image from URL then encode image to base64 using requests"""
+ pass
+
+ # Function to handle vision tasks
+ def run(self, task: Optional[str] = None, img: Optional[str] = None, *args, **kwargs):
+ """Run the model."""
+ try:
+ base64_image = self.encode_image(img)
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {openai_api_key}",
+ }
+ payload = {
+ "model": "gpt-4-vision-preview",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": task},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": (
+ f"data:image/jpeg;base64,{base64_image}"
+ )
+ },
+ },
+ ],
+ }
+ ],
+ "max_tokens": self.max_tokens,
+ }
+ response = requests.post(
+ self.openai_proxy,
+ headers=headers,
+ json=payload,
+ )
+
+ out = response.json()
+ content = out["choices"][0]["message"]["content"]
+
+ if self.streaming_enabled:
+ content = self.stream_response(content)
+ else:
+ pass
+
+ if self.beautify:
+ content = colored(content, "cyan")
+ print(content)
+ else:
+ print(content)
+
+ except Exception as error:
+ print(f"Error with the request: {error}")
+ raise error
+
+ def video_prompt(self, frames):
+ """
+ SystemPrompt is a class that generates a prompt for the user to respond to.
+ The prompt is generated based on the current state of the system.
+
+ Parameters
+ ----------
+ frames : list
+ A list of base64 frames
+
+ Returns
+ -------
+ PROMPT : str
+ The system prompt
+
+ Examples
+ --------
+
+ >>> from swarms.models import GPT4VisionAPI
+ >>> llm = GPT4VisionAPI()
+ >>> video = "video.mp4"
+ >>> base64_frames = llm.process_video(video)
+ >>> prompt = llm.video_prompt(base64_frames)
+ >>> print(prompt)
+
+ """
+ PROMPT = f"""
+ These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video:
+
+ {frames}
+ """
+ return PROMPT
+
+ def stream_response(self, content: str):
+ """Stream the response of the output
+
+ Args:
+ content (str): _description_
+ """
+ for chunk in content:
+ print(chunk)
+
+ def process_video(self, video: str):
+ """
+ Process a video into a list of base64 frames
+
+ Parameters
+ ----------
+ video : str
+ The path to the video file
+
+ Returns
+ -------
+ base64_frames : list
+ A list of base64 frames
+
+ Examples
+ --------
+ >>> from swarms.models import GPT4VisionAPI
+ >>> llm = GPT4VisionAPI()
+ >>> video = "video.mp4"
+ >>> base64_frames = llm.process_video(video)
+
+ """
+ video = cv2.VideoCapture(video)
+
+ base64_frames = []
+ while video.isOpened():
+ success, frame = video.read()
+ if not success:
+ break
+ _, buffer = cv2.imencode(".jpg", frame)
+ base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
+
+ video.release()
+ print(len(base64_frames), "frames read.")
+
+ for img in base64_frames:
+ base64.b64decode(img.encode("utf-8"))
+
+ def __call__(self, task: str, img: str):
+ """Run the model."""
+ try:
+ base64_image = self.encode_image(img)
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {openai_api_key}",
+ }
+ payload = {
+ "model": "gpt-4-vision-preview",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": task},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": (
+ f"data:image/jpeg;base64,{base64_image}"
+ )
+ },
+ },
+ ],
+ }
+ ],
+ "max_tokens": self.max_tokens,
+ }
+ response = requests.post(
+ self.openai_proxy,
+ headers=headers,
+ json=payload,
+ )
+
+ out = response.json()
+ content = out["choices"][0]["message"]["content"]
+
+ if self.streaming_enabled:
+ content = self.stream_response(content)
+ else:
+ pass
+
+ if self.beautify:
+ content = colored(content, "cyan")
+ print(content)
+ else:
+ print(content)
+
+ except Exception as error:
+ print(f"Error with the request: {error}")
+ raise error
+
+ def run_many(
+ self,
+ tasks: List[str],
+ imgs: List[str],
+ ):
+ """
+ Run the model on multiple tasks and images all at once using concurrent
+
+ Args:
+ tasks (List[str]): List of tasks
+ imgs (List[str]): List of image paths
+
+ Returns:
+ List[str]: List of responses
+
+
+ """
+ # Instantiate the thread pool executor
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+ results = executor.map(self.run, tasks, imgs)
+
+ # Print the results for debugging
+ for result in results:
+ print(result)
+
+ return list(results)
+
+ async def arun(
+ self,
+ task: Optional[str] = None,
+ img: Optional[str] = None,
+ ):
+ """
+ Asynchronously run the model
+
+ Overview:
+ ---------
+ This method is used to asynchronously run the model. It is used to run the model
+ on a single task and image.
+
+ Parameters:
+ ----------
+ task : str
+ The task to run the model on.
+ img : str
+ The image to run the task on
+
+ """
+ try:
+ base64_image = self.encode_image(img)
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {openai_api_key}",
+ }
+ payload = {
+ "model": "gpt-4-vision-preview",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": task},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": (
+ f"data:image/jpeg;base64,{base64_image}"
+ )
+ },
+ },
+ ],
+ }
+ ],
+ "max_tokens": self.max_tokens,
+ }
+ async with aiohttp.ClientSession() as session:
+ async with session.post(
+ self.openai_proxy, headers=headers, data=json.dumps(payload)
+ ) as response:
+ out = await response.json()
+ content = out["choices"][0]["message"]["content"]
+ print(content)
+ except Exception as error:
+ print(f"Error with the request {error}")
+ raise error
+
+ def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]:
+ """Process a batch of tasks and images"""
+ with concurrent.futures.ThreadPoolExecutor() as executor:
+ futures = [
+ executor.submit(self.run, task, img)
+ for task, img in tasks_images
+ ]
+ results = [future.result() for future in futures]
+ return results
+
+ async def run_batch_async(
+ self, tasks_images: List[Tuple[str, str]]
+ ) -> List[str]:
+ """Process a batch of tasks and images asynchronously"""
+ loop = asyncio.get_event_loop()
+ futures = [
+ loop.run_in_executor(None, self.run, task, img)
+ for task, img in tasks_images
+ ]
+ return await asyncio.gather(*futures)
+
+ async def run_batch_async_with_retries(
+ self, tasks_images: List[Tuple[str, str]]
+ ) -> List[str]:
+ """Process a batch of tasks and images asynchronously with retries"""
+ loop = asyncio.get_event_loop()
+ futures = [
+ loop.run_in_executor(None, self.run_with_retries, task, img)
+ for task, img in tasks_images
+ ]
+ return await asyncio.gather(*futures)
+
+ def health_check(self):
+ """Health check for the GPT4Vision model"""
+ try:
+ response = requests.get("https://api.openai.com/v1/engines")
+ return response.status_code == 200
+ except requests.RequestException as error:
+ print(f"Health check failed: {error}")
+ return False
+
+ def print_dashboard(self):
+ dashboard = print(
+ colored(
+ f"""
+ GPT4Vision Dashboard
+ -------------------
+ Model: {self.model_name}
+ Max Workers: {self.max_workers}
+ OpenAIProxy: {self.openai_proxy}
+ """,
+ "green",
+ )
+ )
+ return dashboard
diff --git a/swarms/models/kosmos_two.py b/swarms/models/kosmos_two.py
index c696ef34..99998287 100644
--- a/swarms/models/kosmos_two.py
+++ b/swarms/models/kosmos_two.py
@@ -18,38 +18,31 @@ def is_overlapping(rect1, rect2):
class Kosmos:
"""
+ Kosmos model by Yen-Chun Shieh
- Args:
+ Parameters
+ ----------
+ model_name : str
+ Path to the pretrained model
+ Examples
+ --------
+ >>> kosmos = Kosmos()
+ >>> kosmos("Hello, my name is", "path/to/image.png")
- # Initialize Kosmos
- kosmos = Kosmos()
-
- # Perform multimodal grounding
- kosmos.multimodal_grounding("Find the red apple in the image.", "https://example.com/apple.jpg")
-
- # Perform referring expression comprehension
- kosmos.referring_expression_comprehension("Show me the green bottle.", "https://example.com/bottle.jpg")
-
- # Generate referring expressions
- kosmos.referring_expression_generation("It is on the table.", "https://example.com/table.jpg")
-
- # Perform grounded visual question answering
- kosmos.grounded_vqa("What is the color of the car?", "https://example.com/car.jpg")
-
- # Generate grounded image caption
- kosmos.grounded_image_captioning("https://example.com/beach.jpg")
"""
def __init__(
self,
model_name="ydshieh/kosmos-2-patch14-224",
+ *args,
+ **kwargs,
):
self.model = AutoModelForVision2Seq.from_pretrained(
- model_name, trust_remote_code=True
+ model_name, trust_remote_code=True, *args, **kwargs
)
self.processor = AutoProcessor.from_pretrained(
- model_name, trust_remote_code=True
+ model_name, trust_remote_code=True, *args, **kwargs
)
def get_image(self, url):
diff --git a/swarms/models/nougat.py b/swarms/models/nougat.py
index 82bb95f5..0eceb362 100644
--- a/swarms/models/nougat.py
+++ b/swarms/models/nougat.py
@@ -18,7 +18,7 @@ class Nougat:
"""
Nougat
- ArgsS:
+ Args:
model_name_or_path: str, default="facebook/nougat-base"
min_length: int, default=1
max_new_tokens: int, default=30
@@ -35,7 +35,7 @@ class Nougat:
self,
model_name_or_path="facebook/nougat-base",
min_length: int = 1,
- max_new_tokens: int = 30,
+ max_new_tokens: int = 5000,
):
self.model_name_or_path = model_name_or_path
self.min_length = min_length
@@ -50,14 +50,17 @@ class Nougat:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model.to(self.device)
- def get_image(self, img_path: str):
+ def get_image(self, img: str):
"""Get an image from a path"""
- image = Image.open(img_path)
- return image
+ img = Image.open(img)
- def __call__(self, img_path: str):
+ if img.mode == "L":
+ img = img.convert("RGB")
+ return img
+
+ def __call__(self, img: str):
"""Call the model with an image_path str as an input"""
- image = Image.open(img_path)
+ image = Image.open(img)
pixel_values = self.processor(image, return_tensors="pt").pixel_values
# Generate transcriptions, here we only generate 30 tokens
@@ -78,6 +81,7 @@ class Nougat:
return out
def clean_nougat_output(raw_output):
+ """Clean the output from nougat to be more readable"""
# Define the pattern to extract the relevant data
daily_balance_pattern = (
r"\*\*(\d{2}/\d{2}/\d{4})\*\*\n\n\*\*([\d,]+\.\d{2})\*\*"
diff --git a/swarms/models/whisperx.py b/swarms/models/whisperx_model.py
similarity index 84%
rename from swarms/models/whisperx.py
rename to swarms/models/whisperx_model.py
index 338971da..338db6e3 100644
--- a/swarms/models/whisperx.py
+++ b/swarms/models/whisperx_model.py
@@ -2,7 +2,7 @@ import os
import subprocess
try:
- import whisperx
+ import swarms.models.whisperx_model as whisperx_model
from pydub import AudioSegment
from pytube import YouTube
except Exception as error:
@@ -66,17 +66,17 @@ class WhisperX:
compute_type = "float16"
# 1. Transcribe with original Whisper (batched) ๐ฃ๏ธ
- model = whisperx.load_model(
+ model = whisperx_model.load_model(
"large-v2", device, compute_type=compute_type
)
- audio = whisperx.load_audio(audio_file)
+ audio = whisperx_model.load_audio(audio_file)
result = model.transcribe(audio, batch_size=batch_size)
# 2. Align Whisper output ๐
- model_a, metadata = whisperx.load_align_model(
+ model_a, metadata = whisperx_model.load_align_model(
language_code=result["language"], device=device
)
- result = whisperx.align(
+ result = whisperx_model.align(
result["segments"],
model_a,
metadata,
@@ -86,7 +86,7 @@ class WhisperX:
)
# 3. Assign speaker labels ๐ท๏ธ
- diarize_model = whisperx.DiarizationPipeline(
+ diarize_model = whisperx_model.DiarizationPipeline(
use_auth_token=self.hf_api_key, device=device
)
diarize_model(audio_file)
@@ -99,16 +99,18 @@ class WhisperX:
print("The key 'segments' is not found in the result.")
def transcribe(self, audio_file):
- model = whisperx.load_model("large-v2", self.device, self.compute_type)
- audio = whisperx.load_audio(audio_file)
+ model = whisperx_model.load_model(
+ "large-v2", self.device, self.compute_type
+ )
+ audio = whisperx_model.load_audio(audio_file)
result = model.transcribe(audio, batch_size=self.batch_size)
# 2. Align Whisper output ๐
- model_a, metadata = whisperx.load_align_model(
+ model_a, metadata = whisperx_model.load_align_model(
language_code=result["language"], device=self.device
)
- result = whisperx.align(
+ result = whisperx_model.align(
result["segments"],
model_a,
metadata,
@@ -118,7 +120,7 @@ class WhisperX:
)
# 3. Assign speaker labels ๐ท๏ธ
- diarize_model = whisperx.DiarizationPipeline(
+ diarize_model = whisperx_model.DiarizationPipeline(
use_auth_token=self.hf_api_key, device=self.device
)
diff --git a/swarms/prompts/autobloggen.py b/swarms/prompts/autobloggen.py
index 64001d1d..a6f9e561 100644
--- a/swarms/prompts/autobloggen.py
+++ b/swarms/prompts/autobloggen.py
@@ -1,4 +1,4 @@
-AUTOBLOG_GEN_GENERATOR = """
+TOPIC_GENERATOR_SYSTEM_PROMPT = """
First search for a list of topics on the web based their relevance to Positive Med's long term vision then rank than based on the goals this month, then output a single headline title for a blog for the next autonomous agent to write the blog, utilize the SOP below to help you strategically select topics. Output a single topic that will be the foundation for a blog.
diff --git a/swarms/structs/flow.py b/swarms/structs/flow.py
index 18a141a3..19d9a90e 100644
--- a/swarms/structs/flow.py
+++ b/swarms/structs/flow.py
@@ -9,9 +9,12 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
from termcolor import colored
+from swarms.tools.tool import BaseTool
from swarms.utils.code_interpreter import SubprocessCodeInterpreter
from swarms.utils.parse_code import extract_code_in_backticks_in_string
-from swarms.tools.tool import BaseTool
+from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
+ MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+)
# System prompt
FLOW_SYSTEM_PROMPT = f"""
@@ -137,7 +140,7 @@ class Flow:
"""
Flow is the structure that provides autonomy to any llm in a reliable and effective fashion.
The flow structure is designed to be used with any llm and provides the following features:
-
+
Features:
* Interactive, AI generates, then user input
* Message history and performance history fed -> into context -> truncate if too long
@@ -154,7 +157,7 @@ class Flow:
retry_interval (int): The interval between retry attempts
interactive (bool): Whether or not to run in interactive mode
dashboard (bool): Whether or not to print the dashboard
- dynamic_temperature(bool): Dynamical temperature handling
+ dynamic_temperature_enabled(bool): Dynamical temperature handling
**kwargs (Any): Any additional keyword arguments
Methods:
@@ -182,7 +185,6 @@ class Flow:
add_message_to_memory_and_truncate: Add the message to the memory and truncate
print_dashboard: Print dashboard
activate_autonomous_agent: Print the autonomous agent activation message
- dynamic_temperature: Dynamically change the temperature
_check_stopping_condition: Check if the stopping condition is met
format_prompt: Format the prompt
get_llm_init_params: Get the llm init params
@@ -236,18 +238,20 @@ class Flow:
dynamic_loops: Optional[bool] = False,
interactive: bool = False,
dashboard: bool = False,
- agent_name: str = " Autonomous Agent XYZ1B",
+ agent_name: str = "Autonomous Agent XYZ1B",
agent_description: str = None,
system_prompt: str = FLOW_SYSTEM_PROMPT,
tools: List[BaseTool] = None,
- dynamic_temperature: bool = False,
- sop: str = None,
+ dynamic_temperature_enabled: Optional[bool] = False,
+ sop: Optional[str] = None,
+ sop_list: Optional[List[str]] = None,
saved_state_path: Optional[str] = "flow_state.json",
- autosave: bool = False,
- context_length: int = 8192,
+ autosave: Optional[bool] = False,
+ context_length: Optional[int] = 8192,
user_name: str = "Human:",
- self_healing: bool = False,
- code_interpreter: bool = False,
+ self_healing_enabled: Optional[bool] = False,
+ code_interpreter: Optional[bool] = False,
+ multi_modal: Optional[bool] = None,
**kwargs: Any,
):
self.llm = llm
@@ -257,22 +261,17 @@ class Flow:
self.loop_interval = loop_interval
self.retry_attempts = retry_attempts
self.retry_interval = retry_interval
- self.feedback = []
- self.memory = []
self.task = None
self.stopping_token = stopping_token # or ""
self.interactive = interactive
self.dashboard = dashboard
self.return_history = return_history
- self.dynamic_temperature = dynamic_temperature
+ self.dynamic_temperature_enabled = dynamic_temperature_enabled
self.dynamic_loops = dynamic_loops
self.user_name = user_name
self.context_length = context_length
- # SOPS to inject into the system prompt
self.sop = sop
- # The max_loops will be set dynamically if the dynamic_loop
- if self.dynamic_loops:
- self.max_loops = "auto"
+ self.sop_list = sop_list
self.tools = tools or []
self.system_prompt = system_prompt
self.agent_name = agent_name
@@ -280,8 +279,27 @@ class Flow:
self.saved_state_path = saved_state_path
self.autosave = autosave
self.response_filters = []
- self.self_healing = self_healing
+ self.self_healing_enabled = self_healing_enabled
self.code_interpreter = code_interpreter
+ self.multi_modal = multi_modal
+
+ # The max_loops will be set dynamically if the dynamic_loop
+ if self.dynamic_loops:
+ self.max_loops = "auto"
+
+ # If multimodal = yes then set the sop to the multimodal sop
+ if self.multi_modal:
+ self.sop = MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1
+
+ # If the user inputs a list of strings for the sop then join them and set the sop
+ if self.sop_list:
+ self.sop = "\n".join(self.sop_list)
+
+ # Memory
+ self.feedback = []
+ self.memory = []
+
+ # Initialize the code executor
self.code_executor = SubprocessCodeInterpreter()
def provide_feedback(self, feedback: str) -> None:
@@ -461,7 +479,7 @@ class Flow:
Retry Interval: {self.retry_interval}
Interactive: {self.interactive}
Dashboard: {self.dashboard}
- Dynamic Temperature: {self.dynamic_temperature}
+ Dynamic Temperature: {self.dynamic_temperature_enabled}
Autosave: {self.autosave}
Saved State: {self.saved_state_path}
Model Configuration: {model_config}
@@ -498,7 +516,7 @@ class Flow:
)
print(error)
- def run(self, task: str, **kwargs):
+ def run(self, task: Optional[str], img: Optional[str] = None, **kwargs):
"""
Run the autonomous agent loop
@@ -528,7 +546,10 @@ class Flow:
self.print_dashboard(task)
loop_count = 0
+
+ # While the max_loops is auto or the loop count is less than the max_loops
while self.max_loops == "auto" or loop_count < self.max_loops:
+ # Loop count
loop_count += 1
print(
colored(f"\nLoop {loop_count} of {self.max_loops}", "blue")
@@ -543,7 +564,7 @@ class Flow:
break
# Adjust temperature, comment if no work
- if self.dynamic_temperature:
+ if self.dynamic_temperature_enabled:
self.dynamic_temperature()
# Preparing the prompt
@@ -552,10 +573,17 @@ class Flow:
attempt = 0
while attempt < self.retry_attempts:
try:
- response = self.llm(
- task,
- **kwargs,
- )
+ if img:
+ response = self.llm(
+ task,
+ img,
+ **kwargs,
+ )
+ else:
+ response = self.llm(
+ task,
+ **kwargs,
+ )
# If code interpreter is enabled then run the code
if self.code_interpreter:
@@ -642,7 +670,7 @@ class Flow:
break
# Adjust temperature, comment if no work
- if self.dynamic_temperature:
+ if self.dynamic_temperature_enabled:
self.dynamic_temperature()
# Preparing the prompt
@@ -987,7 +1015,7 @@ class Flow:
"retry_interval": self.retry_interval,
"interactive": self.interactive,
"dashboard": self.dashboard,
- "dynamic_temperature": self.dynamic_temperature,
+ "dynamic_temperature": self.dynamic_temperature_enabled,
}
with open(file_path, "w") as f:
diff --git a/swarms/structs/sequential_workflow.py b/swarms/structs/sequential_workflow.py
index 753ada15..0f99a247 100644
--- a/swarms/structs/sequential_workflow.py
+++ b/swarms/structs/sequential_workflow.py
@@ -29,6 +29,18 @@ class Task:
Task class for running a task in a sequential workflow.
+ Args:
+ description (str): The description of the task.
+ flow (Union[Callable, Flow]): The model or flow to execute the task.
+ args (List[Any]): Additional arguments to pass to the task execution.
+ kwargs (Dict[str, Any]): Additional keyword arguments to pass to the task execution.
+ result (Any): The result of the task execution.
+ history (List[Any]): The history of the task execution.
+
+ Methods:
+ execute: Execute the task.
+
+
Examples:
>>> from swarms.structs import Task, Flow
>>> from swarms.models import OpenAIChat
@@ -37,8 +49,6 @@ class Task:
>>> task.execute()
>>> task.result
-
-
"""
description: str
@@ -54,9 +64,6 @@ class Task:
Raises:
ValueError: If a Flow instance is used as a task and the 'task' argument is not provided.
-
-
-
"""
if isinstance(self.flow, Flow):
# Add a prompt to notify the Flow of the sequential workflow
@@ -114,14 +121,20 @@ class SequentialWorkflow:
dashboard: bool = False
def add(
- self, task: str, flow: Union[Callable, Flow], *args, **kwargs
+ self,
+ flow: Union[Callable, Flow],
+ task: Optional[str] = None,
+ img: Optional[str] = None,
+ *args,
+ **kwargs,
) -> None:
"""
Add a task to the workflow.
Args:
- task (str): The task description or the initial input for the Flow.
flow (Union[Callable, Flow]): The model or flow to execute the task.
+ task (str): The task description or the initial input for the Flow.
+ img (str): The image to understand for the task.
*args: Additional arguments to pass to the task execution.
**kwargs: Additional keyword arguments to pass to the task execution.
"""
@@ -130,9 +143,22 @@ class SequentialWorkflow:
kwargs["task"] = task # Set the task as a keyword argument for Flow
# Append the task to the tasks list
- self.tasks.append(
- Task(description=task, flow=flow, args=list(args), kwargs=kwargs)
- )
+ if self.img:
+ self.tasks.append(
+ Task(
+ description=task,
+ flow=flow,
+ args=list(args),
+ kwargs=kwargs,
+ img=img,
+ )
+ )
+ else:
+ self.tasks.append(
+ Task(
+ description=task, flow=flow, args=list(args), kwargs=kwargs
+ )
+ )
def reset_workflow(self) -> None:
"""Resets the workflow by clearing the results of each task."""
@@ -148,18 +174,16 @@ class SequentialWorkflow:
"""
return {task.description: task.result for task in self.tasks}
- def remove_task(self, task_description: str) -> None:
+ def remove_task(self, task: str) -> None:
"""Remove tasks from sequential workflow"""
- self.tasks = [
- task for task in self.tasks if task.description != task_description
- ]
+ self.tasks = [task for task in self.tasks if task.description != task]
- def update_task(self, task_description: str, **updates) -> None:
+ def update_task(self, task: str, **updates) -> None:
"""
Updates the arguments of a task in the workflow.
Args:
- task_description (str): The description of the task to update.
+ task (str): The description of the task to update.
**updates: The updates to apply to the task.
Raises:
@@ -178,11 +202,11 @@ class SequentialWorkflow:
"""
for task in self.tasks:
- if task.description == task_description:
+ if task.description == task:
task.kwargs.update(updates)
break
else:
- raise ValueError(f"Task {task_description} not found in workflow.")
+ raise ValueError(f"Task {task} not found in workflow.")
def save_workflow_state(
self,
@@ -272,6 +296,7 @@ class SequentialWorkflow:
)
def workflow_shutdown(self, **kwargs) -> None:
+ """Shuts down the workflow."""
print(
colored(
"""
@@ -282,6 +307,7 @@ class SequentialWorkflow:
)
def add_objective_to_workflow(self, task: str, **kwargs) -> None:
+ """Adds an objective to the workflow."""
print(
colored(
"""
diff --git a/swarms/tools/tool.py b/swarms/tools/tool.py
index 8ae3b7cd..105a2541 100644
--- a/swarms/tools/tool.py
+++ b/swarms/tools/tool.py
@@ -118,7 +118,7 @@ class ToolException(Exception):
class BaseTool(RunnableSerializable[Union[str, Dict], Any]):
- """Interface LangChain tools must implement."""
+ """Interface swarms tools must implement."""
def __init_subclass__(cls, **kwargs: Any) -> None:
"""Create the definition of the new tool class."""
diff --git a/swarms/utils/disable_logging.py b/swarms/utils/disable_logging.py
new file mode 100644
index 00000000..d1c7df9b
--- /dev/null
+++ b/swarms/utils/disable_logging.py
@@ -0,0 +1,30 @@
+import logging
+import os
+import warnings
+
+
+def disable_logging():
+ warnings.filterwarnings("ignore", category=UserWarning)
+
+ # disable tensorflow warnings
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+ # Set the logging level for the entire module
+ logging.basicConfig(level=logging.WARNING)
+
+ try:
+ log = logging.getLogger("pytorch")
+ log.propagate = False
+ log.setLevel(logging.ERROR)
+ except Exception as error:
+ print(f"Pytorch logging not disabled: {error}")
+
+ for logger_name in [
+ "tensorflow",
+ "h5py",
+ "numexpr",
+ "git",
+ "wandb.docker.auth",
+ ]:
+ logger = logging.getLogger(logger_name)
+ logger.setLevel(logging.WARNING) # Supress DEBUG and info logs
diff --git a/tests/Dockerfile b/tests/Dockerfile
index b36c8d25..f6e46515 100644
--- a/tests/Dockerfile
+++ b/tests/Dockerfile
@@ -21,7 +21,6 @@ RUN pip install poetry
# Disable virtualenv creation by poetry and install dependencies
RUN poetry config virtualenvs.create false
-RUN poetry install --no-interaction --no-ansi
# Install the 'swarms' package if it's not included in the poetry.lock
RUN pip install swarms
@@ -30,4 +29,4 @@ RUN pip install swarms
RUN pip install pytest
# Run pytest on all tests in the tests directory
-CMD find ./tests -name '*.py' -exec pytest {} +
+CMD pytest
diff --git a/tests/agents/idea_to_image.py b/tests/agents/test_idea_to_image.py
similarity index 100%
rename from tests/agents/idea_to_image.py
rename to tests/agents/test_idea_to_image.py
diff --git a/tests/agents/omni_modal.py b/tests/agents/test_omni_modal.py
similarity index 100%
rename from tests/agents/omni_modal.py
rename to tests/agents/test_omni_modal.py
diff --git a/tests/embeddings/pegasus.py b/tests/embeddings/test_pegasus.py
similarity index 100%
rename from tests/embeddings/pegasus.py
rename to tests/embeddings/test_pegasus.py
diff --git a/tests/memory/main.py b/tests/memory/test_main.py
similarity index 100%
rename from tests/memory/main.py
rename to tests/memory/test_main.py
diff --git a/tests/memory/oceandb.py b/tests/memory/test_oceandb.py
similarity index 100%
rename from tests/memory/oceandb.py
rename to tests/memory/test_oceandb.py
diff --git a/tests/memory/pg.py b/tests/memory/test_pg.py
similarity index 100%
rename from tests/memory/pg.py
rename to tests/memory/test_pg.py
diff --git a/tests/memory/pinecone.py b/tests/memory/test_pinecone.py
similarity index 100%
rename from tests/memory/pinecone.py
rename to tests/memory/test_pinecone.py
diff --git a/tests/models/revgptv4.py b/tests/models/revgptv4.py
deleted file mode 100644
index 7a40ab30..00000000
--- a/tests/models/revgptv4.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import unittest
-from unittest.mock import patch
-from RevChatGPTModelv4 import RevChatGPTModelv4
-
-
-class TestRevChatGPT(unittest.TestCase):
- def setUp(self):
- self.access_token = "123"
- self.model = RevChatGPTModelv4(access_token=self.access_token)
-
- def test_run(self):
- prompt = "What is the capital of France?"
- self.model.start_time = 10
- self.model.end_time = 20
- response = self.model.run(prompt)
- self.assertEqual(response, "The capital of France is Paris.")
- self.assertEqual(self.model.start_time, 10)
- self.assertEqual(self.model.end_time, 20)
-
- def test_generate_summary(self):
- text = "Hello world. This is some text. It has multiple sentences."
- summary = self.model.generate_summary(text)
- self.assertEqual(summary, "")
-
- @patch("RevChatGPTModelv4.Chatbot.install_plugin")
- def test_enable_plugin(self, mock_install_plugin):
- plugin_id = "plugin123"
- self.model.enable_plugin(plugin_id)
- mock_install_plugin.assert_called_with(plugin_id=plugin_id)
-
- @patch("RevChatGPTModelv4.Chatbot.get_plugins")
- def test_list_plugins(self, mock_get_plugins):
- mock_get_plugins.return_value = [{"id": "123", "name": "Test Plugin"}]
- plugins = self.model.list_plugins()
- self.assertEqual(len(plugins), 1)
- self.assertEqual(plugins[0]["id"], "123")
- self.assertEqual(plugins[0]["name"], "Test Plugin")
-
- @patch("RevChatGPTModelv4.Chatbot.get_conversations")
- def test_get_conversations(self, mock_get_conversations):
- self.model.chatbot.get_conversations()
- mock_get_conversations.assert_called()
-
- @patch("RevChatGPTModelv4.Chatbot.get_msg_history")
- def test_get_msg_history(self, mock_get_msg_history):
- convo_id = "123"
- self.model.chatbot.get_msg_history(convo_id)
- mock_get_msg_history.assert_called_with(convo_id)
-
- @patch("RevChatGPTModelv4.Chatbot.share_conversation")
- def test_share_conversation(self, mock_share_conversation):
- self.model.chatbot.share_conversation()
- mock_share_conversation.assert_called()
-
- @patch("RevChatGPTModelv4.Chatbot.gen_title")
- def test_gen_title(self, mock_gen_title):
- convo_id = "123"
- message_id = "456"
- self.model.chatbot.gen_title(convo_id, message_id)
- mock_gen_title.assert_called_with(convo_id, message_id)
-
- @patch("RevChatGPTModelv4.Chatbot.change_title")
- def test_change_title(self, mock_change_title):
- convo_id = "123"
- title = "New Title"
- self.model.chatbot.change_title(convo_id, title)
- mock_change_title.assert_called_with(convo_id, title)
-
- @patch("RevChatGPTModelv4.Chatbot.delete_conversation")
- def test_delete_conversation(self, mock_delete_conversation):
- convo_id = "123"
- self.model.chatbot.delete_conversation(convo_id)
- mock_delete_conversation.assert_called_with(convo_id)
-
- @patch("RevChatGPTModelv4.Chatbot.clear_conversations")
- def test_clear_conversations(self, mock_clear_conversations):
- self.model.chatbot.clear_conversations()
- mock_clear_conversations.assert_called()
-
- @patch("RevChatGPTModelv4.Chatbot.rollback_conversation")
- def test_rollback_conversation(self, mock_rollback_conversation):
- num = 2
- self.model.chatbot.rollback_conversation(num)
- mock_rollback_conversation.assert_called_with(num)
-
- @patch("RevChatGPTModelv4.Chatbot.reset_chat")
- def test_reset_chat(self, mock_reset_chat):
- self.model.chatbot.reset_chat()
- mock_reset_chat.assert_called()
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/tests/models/LLM.py b/tests/models/test_LLM.py
similarity index 100%
rename from tests/models/LLM.py
rename to tests/models/test_LLM.py
diff --git a/tests/models/ada.py b/tests/models/test_ada.py
similarity index 100%
rename from tests/models/ada.py
rename to tests/models/test_ada.py
diff --git a/tests/models/anthropic.py b/tests/models/test_anthropic.py
similarity index 100%
rename from tests/models/anthropic.py
rename to tests/models/test_anthropic.py
diff --git a/tests/models/auto_temp.py b/tests/models/test_auto_temp.py
similarity index 100%
rename from tests/models/auto_temp.py
rename to tests/models/test_auto_temp.py
diff --git a/tests/models/bingchat.py b/tests/models/test_bingchat.py
similarity index 100%
rename from tests/models/bingchat.py
rename to tests/models/test_bingchat.py
diff --git a/tests/models/bioclip.py b/tests/models/test_bioclip.py
similarity index 100%
rename from tests/models/bioclip.py
rename to tests/models/test_bioclip.py
diff --git a/tests/models/biogpt.py b/tests/models/test_biogpt.py
similarity index 100%
rename from tests/models/biogpt.py
rename to tests/models/test_biogpt.py
diff --git a/tests/models/cohere.py b/tests/models/test_cohere.py
similarity index 100%
rename from tests/models/cohere.py
rename to tests/models/test_cohere.py
diff --git a/tests/models/dalle3.py b/tests/models/test_dalle3.py
similarity index 100%
rename from tests/models/dalle3.py
rename to tests/models/test_dalle3.py
diff --git a/tests/models/distill_whisper.py b/tests/models/test_distill_whisper.py
similarity index 100%
rename from tests/models/distill_whisper.py
rename to tests/models/test_distill_whisper.py
diff --git a/tests/models/distilled_whisperx.py b/tests/models/test_distilled_whisperx.py
similarity index 100%
rename from tests/models/distilled_whisperx.py
rename to tests/models/test_distilled_whisperx.py
diff --git a/tests/models/elevenlab.py b/tests/models/test_elevenlab.py
similarity index 100%
rename from tests/models/elevenlab.py
rename to tests/models/test_elevenlab.py
diff --git a/tests/models/fuyu.py b/tests/models/test_fuyu.py
similarity index 100%
rename from tests/models/fuyu.py
rename to tests/models/test_fuyu.py
diff --git a/tests/models/test_gpt4_vision_api.py b/tests/models/test_gpt4_vision_api.py
new file mode 100644
index 00000000..bca3b5f6
--- /dev/null
+++ b/tests/models/test_gpt4_vision_api.py
@@ -0,0 +1,238 @@
+import asyncio
+import os
+from unittest.mock import AsyncMock, Mock, mock_open, patch
+from aiohttp import ClientResponseError
+import pytest
+from dotenv import load_dotenv
+from requests.exceptions import RequestException
+
+from swarms.models.gpt4_vision_api import GPT4VisionAPI
+
+load_dotenv()
+
+
+custom_api_key = os.environ.get("OPENAI_API_KEY")
+img = "images/swarms.jpeg"
+
+
+@pytest.fixture
+def vision_api():
+ return GPT4VisionAPI(openai_api_key="test_api_key")
+
+
+def test_init(vision_api):
+ assert vision_api.openai_api_key == "test_api_key"
+
+
+def test_encode_image(vision_api):
+ with patch(
+ "builtins.open", mock_open(read_data=b"test_image_data"), create=True
+ ):
+ encoded_image = vision_api.encode_image(img)
+ assert encoded_image == "dGVzdF9pbWFnZV9kYXRh"
+
+
+def test_run_success(vision_api):
+ expected_response = {"choices": [{"text": "This is the model's response."}]}
+ with patch(
+ "requests.post", return_value=Mock(json=lambda: expected_response)
+ ) as mock_post:
+ result = vision_api.run("What is this?", img)
+ mock_post.assert_called_once()
+ assert result == "This is the model's response."
+
+
+def test_run_request_error(vision_api):
+ with patch(
+ "requests.post", side_effect=RequestException("Request Error")
+ ) as mock_post:
+ with pytest.raises(RequestException):
+ vision_api.run("What is this?", img)
+
+
+def test_run_response_error(vision_api):
+ expected_response = {"error": "Model Error"}
+ with patch(
+ "requests.post", return_value=Mock(json=lambda: expected_response)
+ ) as mock_post:
+ with pytest.raises(RuntimeError):
+ vision_api.run("What is this?", img)
+
+
+def test_call(vision_api):
+ expected_response = {"choices": [{"text": "This is the model's response."}]}
+ with patch(
+ "requests.post", return_value=Mock(json=lambda: expected_response)
+ ) as mock_post:
+ result = vision_api("What is this?", img)
+ mock_post.assert_called_once()
+ assert result == "This is the model's response."
+
+
+@pytest.fixture
+def gpt_api():
+ return GPT4VisionAPI()
+
+
+def test_initialization_with_default_key():
+ api = GPT4VisionAPI()
+ assert api.openai_api_key == custom_api_key
+
+
+def test_initialization_with_custom_key():
+ custom_key = custom_api_key
+ api = GPT4VisionAPI(openai_api_key=custom_key)
+ assert api.openai_api_key == custom_key
+
+
+def test_run_successful_response(gpt_api):
+ task = "What is in the image?"
+ img_url = img
+ response_json = {"choices": [{"text": "Answer from GPT-4 Vision"}]}
+ mock_response = Mock()
+ mock_response.json.return_value = response_json
+ with patch("requests.post", return_value=mock_response) as mock_post:
+ result = gpt_api.run(task, img_url)
+ mock_post.assert_called_once()
+ assert result == response_json["choices"][0]["text"]
+
+
+def test_run_with_exception(gpt_api):
+ task = "What is in the image?"
+ img_url = img
+ with patch("requests.post", side_effect=Exception("Test Exception")):
+ with pytest.raises(Exception):
+ gpt_api.run(task, img_url)
+
+
+def test_call_method_successful_response(gpt_api):
+ task = "What is in the image?"
+ img_url = img
+ response_json = {"choices": [{"text": "Answer from GPT-4 Vision"}]}
+ mock_response = Mock()
+ mock_response.json.return_value = response_json
+ with patch("requests.post", return_value=mock_response) as mock_post:
+ result = gpt_api(task, img_url)
+ mock_post.assert_called_once()
+ assert result == response_json
+
+
+def test_call_method_with_exception(gpt_api):
+ task = "What is in the image?"
+ img_url = img
+ with patch("requests.post", side_effect=Exception("Test Exception")):
+ with pytest.raises(Exception):
+ gpt_api(task, img_url)
+
+
+@pytest.mark.asyncio
+async def test_arun_success(vision_api):
+ expected_response = {
+ "choices": [{"message": {"content": "This is the model's response."}}]
+ }
+ with patch(
+ "aiohttp.ClientSession.post",
+ new_callable=AsyncMock,
+ return_value=AsyncMock(json=AsyncMock(return_value=expected_response)),
+ ) as mock_post:
+ result = await vision_api.arun("What is this?", img)
+ mock_post.assert_called_once()
+ assert result == "This is the model's response."
+
+
+@pytest.mark.asyncio
+async def test_arun_request_error(vision_api):
+ with patch(
+ "aiohttp.ClientSession.post",
+ new_callable=AsyncMock,
+ side_effect=Exception("Request Error"),
+ ) as mock_post:
+ with pytest.raises(Exception):
+ await vision_api.arun("What is this?", img)
+
+
+def test_run_many_success(vision_api):
+ expected_response = {
+ "choices": [{"message": {"content": "This is the model's response."}}]
+ }
+ with patch(
+ "requests.post", return_value=Mock(json=lambda: expected_response)
+ ) as mock_post:
+ tasks = ["What is this?", "What is that?"]
+ imgs = [img, img]
+ results = vision_api.run_many(tasks, imgs)
+ assert mock_post.call_count == 2
+ assert results == [
+ "This is the model's response.",
+ "This is the model's response.",
+ ]
+
+
+def test_run_many_request_error(vision_api):
+ with patch(
+ "requests.post", side_effect=RequestException("Request Error")
+ ) as mock_post:
+ tasks = ["What is this?", "What is that?"]
+ imgs = [img, img]
+ with pytest.raises(RequestException):
+ vision_api.run_many(tasks, imgs)
+
+
+@pytest.mark.asyncio
+async def test_arun_json_decode_error(vision_api):
+ with patch(
+ "aiohttp.ClientSession.post",
+ new_callable=AsyncMock,
+ return_value=AsyncMock(json=AsyncMock(side_effect=ValueError)),
+ ) as mock_post:
+ with pytest.raises(ValueError):
+ await vision_api.arun("What is this?", img)
+
+
+@pytest.mark.asyncio
+async def test_arun_api_error(vision_api):
+ error_response = {"error": {"message": "API Error"}}
+ with patch(
+ "aiohttp.ClientSession.post",
+ new_callable=AsyncMock,
+ return_value=AsyncMock(json=AsyncMock(return_value=error_response)),
+ ) as mock_post:
+ with pytest.raises(Exception, match="API Error"):
+ await vision_api.arun("What is this?", img)
+
+
+@pytest.mark.asyncio
+async def test_arun_unexpected_response(vision_api):
+ unexpected_response = {"unexpected": "response"}
+ with patch(
+ "aiohttp.ClientSession.post",
+ new_callable=AsyncMock,
+ return_value=AsyncMock(
+ json=AsyncMock(return_value=unexpected_response)
+ ),
+ ) as mock_post:
+ with pytest.raises(Exception, match="Unexpected response"):
+ await vision_api.arun("What is this?", img)
+
+
+@pytest.mark.asyncio
+async def test_arun_retries(vision_api):
+ with patch(
+ "aiohttp.ClientSession.post",
+ new_callable=AsyncMock,
+ side_effect=ClientResponseError(None, None),
+ ) as mock_post:
+ with pytest.raises(ClientResponseError):
+ await vision_api.arun("What is this?", img)
+ assert mock_post.call_count == vision_api.retries + 1
+
+
+@pytest.mark.asyncio
+async def test_arun_timeout(vision_api):
+ with patch(
+ "aiohttp.ClientSession.post",
+ new_callable=AsyncMock,
+ side_effect=asyncio.TimeoutError,
+ ) as mock_post:
+ with pytest.raises(asyncio.TimeoutError):
+ await vision_api.arun("What is this?", img)
diff --git a/tests/models/gpt4v.py b/tests/models/test_gpt4v.py
similarity index 100%
rename from tests/models/gpt4v.py
rename to tests/models/test_gpt4v.py
diff --git a/tests/models/hf.py b/tests/models/test_hf.py
similarity index 100%
rename from tests/models/hf.py
rename to tests/models/test_hf.py
diff --git a/tests/models/huggingface.py b/tests/models/test_huggingface.py
similarity index 100%
rename from tests/models/huggingface.py
rename to tests/models/test_huggingface.py
diff --git a/tests/models/idefics.py b/tests/models/test_idefics.py
similarity index 100%
rename from tests/models/idefics.py
rename to tests/models/test_idefics.py
diff --git a/tests/models/jina_embeds.py b/tests/models/test_jina_embeds.py
similarity index 100%
rename from tests/models/jina_embeds.py
rename to tests/models/test_jina_embeds.py
diff --git a/tests/models/kosmos.py b/tests/models/test_kosmos.py
similarity index 100%
rename from tests/models/kosmos.py
rename to tests/models/test_kosmos.py
diff --git a/tests/models/kosmos2.py b/tests/models/test_kosmos2.py
similarity index 100%
rename from tests/models/kosmos2.py
rename to tests/models/test_kosmos2.py
diff --git a/tests/models/llama_function_caller.py b/tests/models/test_llama_function_caller.py
similarity index 100%
rename from tests/models/llama_function_caller.py
rename to tests/models/test_llama_function_caller.py
diff --git a/tests/models/mistral.py b/tests/models/test_mistral.py
similarity index 100%
rename from tests/models/mistral.py
rename to tests/models/test_mistral.py
diff --git a/tests/models/mpt7b.py b/tests/models/test_mpt7b.py
similarity index 100%
rename from tests/models/mpt7b.py
rename to tests/models/test_mpt7b.py
diff --git a/tests/models/nougat.py b/tests/models/test_nougat.py
similarity index 100%
rename from tests/models/nougat.py
rename to tests/models/test_nougat.py
diff --git a/tests/models/revgptv1.py b/tests/models/test_revgptv1.py
similarity index 100%
rename from tests/models/revgptv1.py
rename to tests/models/test_revgptv1.py
diff --git a/tests/models/speech_t5.py b/tests/models/test_speech_t5.py
similarity index 100%
rename from tests/models/speech_t5.py
rename to tests/models/test_speech_t5.py
diff --git a/tests/models/ssd_1b.py b/tests/models/test_ssd_1b.py
similarity index 100%
rename from tests/models/ssd_1b.py
rename to tests/models/test_ssd_1b.py
diff --git a/tests/models/timm_model.py b/tests/models/test_timm_model.py
similarity index 100%
rename from tests/models/timm_model.py
rename to tests/models/test_timm_model.py
diff --git a/tests/models/vilt.py b/tests/models/test_vilt.py
similarity index 100%
rename from tests/models/vilt.py
rename to tests/models/test_vilt.py
diff --git a/tests/models/whisperx.py b/tests/models/test_whisperx.py
similarity index 99%
rename from tests/models/whisperx.py
rename to tests/models/test_whisperx.py
index 5fad3431..ed671cb2 100644
--- a/tests/models/whisperx.py
+++ b/tests/models/test_whisperx.py
@@ -7,7 +7,7 @@ import pytest
import whisperx
from pydub import AudioSegment
from pytube import YouTube
-from swarms.models.whisperx import WhisperX
+from swarms.models.whisperx_model import WhisperX
# Fixture to create a temporary directory for testing
diff --git a/tests/models/yi_200k.py b/tests/models/test_yi_200k.py
similarity index 100%
rename from tests/models/yi_200k.py
rename to tests/models/test_yi_200k.py
diff --git a/tests/structs/flow.py b/tests/structs/test_flow.py
similarity index 100%
rename from tests/structs/flow.py
rename to tests/structs/test_flow.py
diff --git a/tests/structs/nonlinear_workflow.py b/tests/structs/test_nonlinear_workflow.py
similarity index 100%
rename from tests/structs/nonlinear_workflow.py
rename to tests/structs/test_nonlinear_workflow.py
diff --git a/tests/structs/sequential_workflow.py b/tests/structs/test_sequential_workflow.py
similarity index 100%
rename from tests/structs/sequential_workflow.py
rename to tests/structs/test_sequential_workflow.py
diff --git a/tests/structs/workflow.py b/tests/structs/test_workflow.py
similarity index 100%
rename from tests/structs/workflow.py
rename to tests/structs/test_workflow.py
diff --git a/tests/swarms/autoscaler.py b/tests/swarms/test_autoscaler.py
similarity index 100%
rename from tests/swarms/autoscaler.py
rename to tests/swarms/test_autoscaler.py
diff --git a/tests/swarms/dialogue_simulator.py b/tests/swarms/test_dialogue_simulator.py
similarity index 100%
rename from tests/swarms/dialogue_simulator.py
rename to tests/swarms/test_dialogue_simulator.py
diff --git a/tests/swarms/godmode.py b/tests/swarms/test_godmode.py
similarity index 100%
rename from tests/swarms/godmode.py
rename to tests/swarms/test_godmode.py
diff --git a/tests/swarms/groupchat.py b/tests/swarms/test_groupchat.py
similarity index 100%
rename from tests/swarms/groupchat.py
rename to tests/swarms/test_groupchat.py
diff --git a/tests/swarms/multi_agent_collab.py b/tests/swarms/test_multi_agent_collab.py
similarity index 100%
rename from tests/swarms/multi_agent_collab.py
rename to tests/swarms/test_multi_agent_collab.py
diff --git a/tests/swarms/multi_agent_debate.py b/tests/swarms/test_multi_agent_debate.py
similarity index 100%
rename from tests/swarms/multi_agent_debate.py
rename to tests/swarms/test_multi_agent_debate.py
diff --git a/tests/swarms/orchestrate.py b/tests/swarms/test_orchestrate.py
similarity index 100%
rename from tests/swarms/orchestrate.py
rename to tests/swarms/test_orchestrate.py
diff --git a/tests/swarms/simple_swarm.py b/tests/swarms/test_simple_swarm.py
similarity index 100%
rename from tests/swarms/simple_swarm.py
rename to tests/swarms/test_simple_swarm.py
diff --git a/tests/tools/base.py b/tests/tools/test_base.py
similarity index 100%
rename from tests/tools/base.py
rename to tests/tools/test_base.py
diff --git a/tests/utils/subprocess_code_interpreter.py b/tests/utils/test_subprocess_code_interpreter.py
similarity index 100%
rename from tests/utils/subprocess_code_interpreter.py
rename to tests/utils/test_subprocess_code_interpreter.py