Merge pull request #1 from kyegomez/master

catch up 20231126 am
pull/207/head
evelynmitchell 1 year ago committed by GitHub
commit aa88b11d3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -5,7 +5,7 @@ AI21_API_KEY="your_api_key_here"
COHERE_API_KEY="your_api_key_here" COHERE_API_KEY="your_api_key_here"
ALEPHALPHA_API_KEY="your_api_key_here" ALEPHALPHA_API_KEY="your_api_key_here"
HUGGINFACEHUB_API_KEY="your_api_key_here" HUGGINFACEHUB_API_KEY="your_api_key_here"
STABILITY_API_KEY="your_api_key_here"
WOLFRAM_ALPHA_APPID="your_wolfram_alpha_appid_here" WOLFRAM_ALPHA_APPID="your_wolfram_alpha_appid_here"
ZAPIER_NLA_API_KEY="your_zapier_nla_api_key_here" ZAPIER_NLA_API_KEY="your_zapier_nla_api_key_here"

@ -19,9 +19,9 @@ jobs:
python-version: ["3.7", "3.9", "3.10", "3.11"] python-version: ["3.7", "3.9", "3.10", "3.11"]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3 uses: actions/setup-python@v4
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install dependencies - name: Install dependencies

@ -1,4 +1,3 @@
---
version: 2 version: 2
build: build:

@ -34,28 +34,35 @@ Run example in Collab: <a target="_blank" href="https://colab.research.google.co
```python ```python
import os
from dotenv import load_dotenv
# Import the OpenAIChat model and the Flow struct
from swarms.models import OpenAIChat from swarms.models import OpenAIChat
from swarms.structs import Flow from swarms.structs import Flow
api_key = "" # Load the environment variables
load_dotenv()
# Initialize the language model, this model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC # Get the API key from the environment
api_key = os.environ.get("OPENAI_API_KEY")
# Initialize the language model
llm = OpenAIChat( llm = OpenAIChat(
openai_api_key=api_key,
temperature=0.5, temperature=0.5,
openai_api_key=api_key,
) )
## Initialize the workflow
flow = Flow(
llm=llm,
max_loops=2,
dashboard=True,
) ## Initialize the workflow
flow = Flow(llm=llm, max_loops=1, dashboard=True)
# Run the workflow on a task
out = flow.run("Generate a 10,000 word blog on health and wellness.") out = flow.run("Generate a 10,000 word blog on health and wellness.")
``` ```
------ ------
@ -125,24 +132,32 @@ for task in workflow.tasks:
```python ```python
from swarms.structs import Flow from swarms.structs import Flow
from swarms.models.gpt4_vision_api import GPT4VisionAPI from swarms.models.gpt4_vision_api import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)
# Initialize the llm
llm = GPT4VisionAPI() llm = GPT4VisionAPI()
task = "Analyze this image of an assembly line and identify any issues such as misaligned parts, defects, or deviations from the standard assembly process. IF there is anything unsafe in the image, explain why it is unsafe and how it could be improved." task = (
"Analyze this image of an assembly line and identify any issues such as"
" misaligned parts, defects, or deviations from the standard assembly"
" process. IF there is anything unsafe in the image, explain why it is"
" unsafe and how it could be improved."
)
img = "assembly_line.jpg" img = "assembly_line.jpg"
## Initialize the workflow ## Initialize the workflow
flow = Flow( flow = Flow(
llm=llm, llm=llm,
max_loops=1, max_loops='auto'
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
dashboard=True, dashboard=True,
) )
# Run the flow
flow.run(task=task, img=img) flow.run(task=task, img=img)
``` ```
--- ---

@ -1,9 +1,21 @@
import os
from dotenv import load_dotenv
# Import the OpenAIChat model and the Flow struct
from swarms.models import OpenAIChat from swarms.models import OpenAIChat
from swarms.structs import Flow from swarms.structs import Flow
# Load the environment variables
load_dotenv()
# Get the API key from the environment
api_key = os.environ.get("OPENAI_API_KEY")
# Initialize the language model # Initialize the language model
llm = OpenAIChat( llm = OpenAIChat(
temperature=0.5, temperature=0.5,
openai_api_key=api_key,
) )

@ -1,4 +1,3 @@
---
site_name: Swarms Docs site_name: Swarms Docs
plugins: plugins:
- glightbox - glightbox
@ -7,9 +6,6 @@ copyright: "&copy; APAC Corp, Inc."
extra_css: extra_css:
- docs/assets/css/extra.css - docs/assets/css/extra.css
extra: extra:
# analytics:
# provider: google
# property: G-QM8EDPSCB6
social: social:
- icon: fontawesome/solid/house - icon: fontawesome/solid/house
link: assets/img/SwarmsLogoIcon.png link: assets/img/SwarmsLogoIcon.png
@ -105,9 +101,6 @@ nav:
- swarms.memory: - swarms.memory:
- PineconeVectorStoreStore: "swarms/memory/pinecone.md" - PineconeVectorStoreStore: "swarms/memory/pinecone.md"
- PGVectorStore: "swarms/memory/pg.md" - PGVectorStore: "swarms/memory/pg.md"
# - swarms.chunkers:
# - BaseChunker: "swarms/chunkers/basechunker.md"
# - PdfChunker: "swarms/chunkers/pdf_chunker.md"
- Guides: - Guides:
- Overview: "examples/index.md" - Overview: "examples/index.md"
- Agents: - Agents:

@ -1,5 +1,8 @@
from swarms.structs import Flow from swarms.structs import Flow
from swarms.models.gpt4_vision_api import GPT4VisionAPI from swarms.models.gpt4_vision_api import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)
llm = GPT4VisionAPI() llm = GPT4VisionAPI()
@ -10,6 +13,7 @@ img = "images/swarms.jpeg"
## Initialize the workflow ## Initialize the workflow
flow = Flow( flow = Flow(
llm=llm, llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
max_loops="auto", max_loops="auto",
) )

@ -1,6 +1,8 @@
from swarms.structs import Flow from swarms.structs import Flow
from swarms.models.gpt4_vision_api import GPT4VisionAPI from swarms.models.gpt4_vision_api import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)
llm = GPT4VisionAPI() llm = GPT4VisionAPI()

@ -0,0 +1,7 @@
"""
Idea 2 img
task -> gpt4 text -> dalle3 img -> gpt4vision img + text analyze img -> dalle3 img -> loop
"""
from swarms.models.gpt4_vision_api import GPT4VisionAPI

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

@ -0,0 +1,127 @@
"""
Swarm of multi modal autonomous agents for manufacturing!
---------------------------------------------------------
Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest
Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest
Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest
Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest
Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest
Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest
Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest
Flow:
health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
"""
from swarms.structs import Flow
import os
from dotenv import load_dotenv
from swarms.models import GPT4VisionAPI
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
llm = GPT4VisionAPI(
openai_api_key=api_key
)
assembly_line = "playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg"
red_robots = "playground/demos/swarm_of_mma_manufacturing/red_robots.jpg"
robots = "playground/demos/swarm_of_mma_manufacturing/robots.jpg"
tesla_assembly_line = "playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg"
# Define detailed prompts for each agent
tasks = {
"health_safety": (
"Analyze the factory's working environment for health safety. Focus on"
" cleanliness, ventilation, spacing between workstations, and personal"
" protective equipment availability."
),
"productivity": (
"Review the factory's workflow efficiency, machine utilization, and"
" employee engagement. Identify operational delays or bottlenecks."
),
"safety": (
"Analyze the factory's safety measures, including fire exits, safety"
" signage, and emergency response equipment."
),
"security": (
"Evaluate the factory's security systems, entry/exit controls, and"
" potential vulnerabilities."
),
"sustainability": (
"Inspect the factory's sustainability practices, including waste"
" management, energy usage, and eco-friendly processes."
),
"efficiency": (
"Assess the manufacturing process's efficiency, considering the layout,"
" logistics, and automation level."
),
}
# Define prompts for each agent
health_safety_prompt = tasks["health_safety"]
productivity_prompt = tasks["productivity"]
safety_prompt = tasks["safety"]
security_prompt = tasks["security"]
sustainability_prompt = tasks["sustainability"]
efficiency_prompt = tasks["efficiency"]
# Health security agent
health_security_agent = Flow(
llm=llm,
sop_list=health_safety_prompt,
max_loops=2,
multi_modal=True
)
# Quality control agent
productivity_check_agent = Flow(
llm=llm,
sop=productivity_prompt,
max_loops=2,
multi_modal=True
)
# Security agent
security_check_agent = Flow(
llm=llm,
sop=security_prompt,
max_loops=2,
multi_modal=True
)
# Efficiency agent
efficiency_check_agent = Flow(
llm=llm,
sop=efficiency_prompt,
max_loops=2,
multi_modal=True
)
# Add the first task to the health_security_agent
health_check = health_security_agent.run(
"Analyze the safety of this factory",
robots
)
# Add the third task to the productivity_check_agent
productivity_check = productivity_check_agent.run(
health_check, assembly_line
)
# Add the fourth task to the security_check_agent
security_check = security_check_agent.add(
productivity_check, red_robots
)
# Add the fifth task to the efficiency_check_agent
efficiency_check = efficiency_check_agent.run(
security_check, tesla_assembly_line
)

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

@ -0,0 +1,112 @@
import os
import base64
import requests
from dotenv import load_dotenv
from typing import List
load_dotenv()
class StableDiffusion:
"""
A class to interact with the Stable Diffusion API for image generation.
Attributes:
-----------
api_key : str
The API key for accessing the Stable Diffusion API.
api_host : str
The host URL of the Stable Diffusion API.
engine_id : str
The ID of the Stable Diffusion engine.
headers : dict
The headers for the API request.
output_dir : str
Directory where generated images will be saved.
Methods:
--------
generate_image(prompt: str, cfg_scale: int, height: int, width: int, samples: int, steps: int) -> List[str]:
Generates images based on a text prompt and returns a list of file paths to the generated images.
"""
def __init__(self, api_key: str, api_host: str = "https://api.stability.ai"):
"""
Initializes the StableDiffusion class with the provided API key and host.
Parameters:
-----------
api_key : str
The API key for accessing the Stable Diffusion API.
api_host : str
The host URL of the Stable Diffusion API. Default is "https://api.stability.ai".
"""
self.api_key = api_key
self.api_host = api_host
self.engine_id = "stable-diffusion-v1-6"
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"Accept": "application/json"
}
self.output_dir = "images"
os.makedirs(self.output_dir, exist_ok=True)
def generate_image(self, prompt: str, cfg_scale: int = 7, height: int = 1024, width: int = 1024, samples: int = 1, steps: int = 30) -> List[str]:
"""
Generates images based on a text prompt.
Parameters:
-----------
prompt : str
The text prompt based on which the image will be generated.
cfg_scale : int
CFG scale parameter for image generation. Default is 7.
height : int
Height of the generated image. Default is 1024.
width : int
Width of the generated image. Default is 1024.
samples : int
Number of images to generate. Default is 1.
steps : int
Number of steps for the generation process. Default is 30.
Returns:
--------
List[str]:
A list of paths to the generated images.
Raises:
-------
Exception:
If the API response is not 200 (OK).
"""
response = requests.post(
f"{self.api_host}/v1/generation/{self.engine_id}/text-to-image",
headers=self.headers,
json={
"text_prompts": [{"text": prompt}],
"cfg_scale": cfg_scale,
"height": height,
"width": width,
"samples": samples,
"steps": steps,
},
)
if response.status_code != 200:
raise Exception(f"Non-200 response: {response.text}")
data = response.json()
image_paths = []
for i, image in enumerate(data["artifacts"]):
image_path = os.path.join(self.output_dir, f"v1_txt2img_{i}.png")
with open(image_path, "wb") as f:
f.write(base64.b64decode(image["base64"]))
image_paths.append(image_path)
return image_paths
# Usage example:
# sd = StableDiffusion("your-api-key")
# images = sd.generate_image("A scenic landscape with mountains")
# print(images)

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry] [tool.poetry]
name = "swarms" name = "swarms"
version = "2.4.1" version = "2.4.5"
description = "Swarms - Pytorch" description = "Swarms - Pytorch"
license = "MIT" license = "MIT"
authors = ["Kye Gomez <kye@apac.ai>"] authors = ["Kye Gomez <kye@apac.ai>"]

@ -1,18 +1,6 @@
import logging from swarms.utils.disable_logging import disable_logging
import os
import warnings
warnings.filterwarnings("ignore", category=UserWarning) disable_logging()
# disable tensorflow warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
try:
log = logging.getLogger("pytorch")
log.propagate = False
log.setLevel(logging.ERROR)
except Exception as error:
print(f"Pytorch logging not disabled: {error}")
from swarms.agents import * # noqa: E402, F403 from swarms.agents import * # noqa: E402, F403
from swarms.swarms import * # noqa: E402, F403 from swarms.swarms import * # noqa: E402, F403

@ -1,3 +1,4 @@
from abc import abstractmethod
import asyncio import asyncio
import base64 import base64
import concurrent.futures import concurrent.futures
@ -7,11 +8,54 @@ from io import BytesIO
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import requests import requests
from ABC import abstractmethod
from PIL import Image from PIL import Image
from termcolor import colored
class BaseMultiModalModel: class BaseMultiModalModel:
"""
Base class for multimodal models
Args:
model_name (Optional[str], optional): Model name. Defaults to None.
temperature (Optional[int], optional): Temperature. Defaults to 0.5.
max_tokens (Optional[int], optional): Max tokens. Defaults to 500.
max_workers (Optional[int], optional): Max workers. Defaults to 10.
top_p (Optional[int], optional): Top p. Defaults to 1.
top_k (Optional[int], optional): Top k. Defaults to 50.
beautify (Optional[bool], optional): Beautify. Defaults to False.
device (Optional[str], optional): Device. Defaults to "cuda".
max_new_tokens (Optional[int], optional): Max new tokens. Defaults to 500.
retries (Optional[int], optional): Retries. Defaults to 3.
Examples:
>>> from swarms.models.base_multimodal_model import BaseMultiModalModel
>>> model = BaseMultiModalModel()
>>> model.run("Generate a summary of this text")
>>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")
>>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
>>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
>>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
>>> model.generate_summary("Generate a summary of this text")
>>> model.set_temperature(0.5)
>>> model.set_max_tokens(500)
>>> model.get_generation_time()
>>> model.get_chat_history()
>>> model.get_unique_chat_history()
>>> model.get_chat_history_length()
>>> model.get_unique_chat_history_length()
>>> model.get_chat_history_tokens()
>>> model.print_beautiful("Print this beautifully")
>>> model.stream("Stream this")
>>> model.unique_chat_history()
>>> model.clear_chat_history()
>>> model.get_img_from_web("https://www.google.com/images/branding/googlelogo/")
"""
def __init__( def __init__(
self, self,
model_name: Optional[str], model_name: Optional[str],
@ -20,6 +64,7 @@ class BaseMultiModalModel:
max_workers: Optional[int] = 10, max_workers: Optional[int] = 10,
top_p: Optional[int] = 1, top_p: Optional[int] = 1,
top_k: Optional[int] = 50, top_k: Optional[int] = 50,
beautify: Optional[bool] = False,
device: Optional[str] = "cuda", device: Optional[str] = "cuda",
max_new_tokens: Optional[int] = 500, max_new_tokens: Optional[int] = 500,
retries: Optional[int] = 3, retries: Optional[int] = 3,
@ -30,12 +75,12 @@ class BaseMultiModalModel:
self.max_workers = max_workers self.max_workers = max_workers
self.top_p = top_p self.top_p = top_p
self.top_k = top_k self.top_k = top_k
self.beautify = beautify
self.device = device self.device = device
self.max_new_tokens = max_new_tokens self.max_new_tokens = max_new_tokens
self.retries = retries self.retries = retries
self.chat_history = [] self.chat_history = []
@abstractmethod @abstractmethod
def __call__(self, text: str, img: str): def __call__(self, text: str, img: str):
"""Run the model""" """Run the model"""
@ -99,7 +144,6 @@ class BaseMultiModalModel:
for result in results: for result in results:
print(result) print(result)
def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]: def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]:
"""Process a batch of tasks and images""" """Process a batch of tasks and images"""
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
@ -207,3 +251,16 @@ class BaseMultiModalModel:
"""Get the chat history tokens""" """Get the chat history tokens"""
return self._num_tokens() return self._num_tokens()
def print_beautiful(self, content: str, color: str = "cyan"):
"""Print Beautifully with termcolor"""
content = colored(content, color)
print(content)
def stream(self, content: str):
"""Stream the output
Args:
content (str): _description_
"""
for chunk in content:
print(chunk)

@ -1,15 +1,22 @@
import asyncio import asyncio
import base64 import base64
import concurrent.futures import concurrent.futures
from termcolor import colored
import json import json
import logging
import os import os
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import List, Tuple from typing import List, Optional, Tuple
import aiohttp import aiohttp
import requests import requests
from dotenv import load_dotenv from dotenv import load_dotenv
from termcolor import colored
try:
import cv2
except ImportError:
print("OpenCV not installed. Please install OpenCV to use this model.")
raise ImportError
# Load environment variables # Load environment variables
load_dotenv() load_dotenv()
@ -54,16 +61,29 @@ class GPT4VisionAPI:
self, self,
openai_api_key: str = openai_api_key, openai_api_key: str = openai_api_key,
model_name: str = "gpt-4-vision-preview", model_name: str = "gpt-4-vision-preview",
logging_enabled: bool = False,
max_workers: int = 10, max_workers: int = 10,
max_tokens: str = 300, max_tokens: str = 300,
openai_proxy: str = "https://api.openai.com/v1/chat/completions", openai_proxy: str = "https://api.openai.com/v1/chat/completions",
beautify: bool = False,
streaming_enabled: Optional[bool] = False,
): ):
super().__init__() super().__init__()
self.openai_api_key = openai_api_key self.openai_api_key = openai_api_key
self.logging_enabled = logging_enabled
self.model_name = model_name self.model_name = model_name
self.max_workers = max_workers self.max_workers = max_workers
self.max_tokens = max_tokens self.max_tokens = max_tokens
self.openai_proxy = openai_proxy self.openai_proxy = openai_proxy
self.beautify = beautify
self.streaming_enabled = streaming_enabled
if self.logging_enabled:
logging.basicConfig(level=logging.DEBUG)
else:
# Disable debug logs for requests and urllib3
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
def encode_image(self, img: str): def encode_image(self, img: str):
"""Encode image to base64.""" """Encode image to base64."""
@ -72,9 +92,10 @@ class GPT4VisionAPI:
def download_img_then_encode(self, img: str): def download_img_then_encode(self, img: str):
"""Download image from URL then encode image to base64 using requests""" """Download image from URL then encode image to base64 using requests"""
pass
# Function to handle vision tasks # Function to handle vision tasks
def run(self, task: str, img: str): def run(self, task: Optional[str] = None, img: Optional[str] = None, *args, **kwargs):
"""Run the model.""" """Run the model."""
try: try:
base64_image = self.encode_image(img) base64_image = self.encode_image(img)
@ -83,7 +104,7 @@ class GPT4VisionAPI:
"Authorization": f"Bearer {openai_api_key}", "Authorization": f"Bearer {openai_api_key}",
} }
payload = { payload = {
"model": self.model_name, "model": "gpt-4-vision-preview",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@ -103,18 +124,109 @@ class GPT4VisionAPI:
"max_tokens": self.max_tokens, "max_tokens": self.max_tokens,
} }
response = requests.post( response = requests.post(
"https://api.openai.com/v1/chat/completions", self.openai_proxy,
headers=headers, headers=headers,
json=payload, json=payload,
) )
out = response.json() out = response.json()
content = out["choices"][0]["message"]["content"] content = out["choices"][0]["message"]["content"]
if self.streaming_enabled:
content = self.stream_response(content)
else:
pass
if self.beautify:
content = colored(content, "cyan")
print(content)
else:
print(content) print(content)
except Exception as error: except Exception as error:
print(f"Error with the request: {error}") print(f"Error with the request: {error}")
raise error raise error
def video_prompt(self, frames):
"""
SystemPrompt is a class that generates a prompt for the user to respond to.
The prompt is generated based on the current state of the system.
Parameters
----------
frames : list
A list of base64 frames
Returns
-------
PROMPT : str
The system prompt
Examples
--------
>>> from swarms.models import GPT4VisionAPI
>>> llm = GPT4VisionAPI()
>>> video = "video.mp4"
>>> base64_frames = llm.process_video(video)
>>> prompt = llm.video_prompt(base64_frames)
>>> print(prompt)
"""
PROMPT = f"""
These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video:
{frames}
"""
return PROMPT
def stream_response(self, content: str):
"""Stream the response of the output
Args:
content (str): _description_
"""
for chunk in content:
print(chunk)
def process_video(self, video: str):
"""
Process a video into a list of base64 frames
Parameters
----------
video : str
The path to the video file
Returns
-------
base64_frames : list
A list of base64 frames
Examples
--------
>>> from swarms.models import GPT4VisionAPI
>>> llm = GPT4VisionAPI()
>>> video = "video.mp4"
>>> base64_frames = llm.process_video(video)
"""
video = cv2.VideoCapture(video)
base64_frames = []
while video.isOpened():
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode(".jpg", frame)
base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
video.release()
print(len(base64_frames), "frames read.")
for img in base64_frames:
base64.b64decode(img.encode("utf-8"))
def __call__(self, task: str, img: str): def __call__(self, task: str, img: str):
"""Run the model.""" """Run the model."""
try: try:
@ -151,11 +263,21 @@ class GPT4VisionAPI:
out = response.json() out = response.json()
content = out["choices"][0]["message"]["content"] content = out["choices"][0]["message"]["content"]
if self.streaming_enabled:
content = self.stream_response(content)
else:
pass
if self.beautify:
content = colored(content, "cyan")
print(content) print(content)
else:
print(content)
except Exception as error: except Exception as error:
print(f"Error with the request: {error}") print(f"Error with the request: {error}")
raise error raise error
# Function to handle vision tasks
def run_many( def run_many(
self, self,
@ -165,6 +287,14 @@ class GPT4VisionAPI:
""" """
Run the model on multiple tasks and images all at once using concurrent Run the model on multiple tasks and images all at once using concurrent
Args:
tasks (List[str]): List of tasks
imgs (List[str]): List of image paths
Returns:
List[str]: List of responses
""" """
# Instantiate the thread pool executor # Instantiate the thread pool executor
with ThreadPoolExecutor(max_workers=self.max_workers) as executor: with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
@ -178,8 +308,8 @@ class GPT4VisionAPI:
async def arun( async def arun(
self, self,
task: str, task: Optional[str] = None,
img: str, img: Optional[str] = None,
): ):
""" """
Asynchronously run the model Asynchronously run the model

@ -99,7 +99,9 @@ class WhisperX:
print("The key 'segments' is not found in the result.") print("The key 'segments' is not found in the result.")
def transcribe(self, audio_file): def transcribe(self, audio_file):
model = whisperx_model.load_model("large-v2", self.device, self.compute_type) model = whisperx_model.load_model(
"large-v2", self.device, self.compute_type
)
audio = whisperx_model.load_audio(audio_file) audio = whisperx_model.load_audio(audio_file)
result = model.transcribe(audio, batch_size=self.batch_size) result = model.transcribe(audio, batch_size=self.batch_size)

@ -9,9 +9,12 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
from termcolor import colored from termcolor import colored
from swarms.tools.tool import BaseTool
from swarms.utils.code_interpreter import SubprocessCodeInterpreter from swarms.utils.code_interpreter import SubprocessCodeInterpreter
from swarms.utils.parse_code import extract_code_in_backticks_in_string from swarms.utils.parse_code import extract_code_in_backticks_in_string
from swarms.tools.tool import BaseTool from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)
# System prompt # System prompt
FLOW_SYSTEM_PROMPT = f""" FLOW_SYSTEM_PROMPT = f"""
@ -154,7 +157,7 @@ class Flow:
retry_interval (int): The interval between retry attempts retry_interval (int): The interval between retry attempts
interactive (bool): Whether or not to run in interactive mode interactive (bool): Whether or not to run in interactive mode
dashboard (bool): Whether or not to print the dashboard dashboard (bool): Whether or not to print the dashboard
dynamic_temperature(bool): Dynamical temperature handling dynamic_temperature_enabled(bool): Dynamical temperature handling
**kwargs (Any): Any additional keyword arguments **kwargs (Any): Any additional keyword arguments
Methods: Methods:
@ -182,7 +185,6 @@ class Flow:
add_message_to_memory_and_truncate: Add the message to the memory and truncate add_message_to_memory_and_truncate: Add the message to the memory and truncate
print_dashboard: Print dashboard print_dashboard: Print dashboard
activate_autonomous_agent: Print the autonomous agent activation message activate_autonomous_agent: Print the autonomous agent activation message
dynamic_temperature: Dynamically change the temperature
_check_stopping_condition: Check if the stopping condition is met _check_stopping_condition: Check if the stopping condition is met
format_prompt: Format the prompt format_prompt: Format the prompt
get_llm_init_params: Get the llm init params get_llm_init_params: Get the llm init params
@ -240,14 +242,16 @@ class Flow:
agent_description: str = None, agent_description: str = None,
system_prompt: str = FLOW_SYSTEM_PROMPT, system_prompt: str = FLOW_SYSTEM_PROMPT,
tools: List[BaseTool] = None, tools: List[BaseTool] = None,
dynamic_temperature: bool = False, dynamic_temperature_enabled: Optional[bool] = False,
sop: str = None, sop: Optional[str] = None,
sop_list: Optional[List[str]] = None,
saved_state_path: Optional[str] = "flow_state.json", saved_state_path: Optional[str] = "flow_state.json",
autosave: bool = False, autosave: Optional[bool] = False,
context_length: int = 8192, context_length: Optional[int] = 8192,
user_name: str = "Human:", user_name: str = "Human:",
self_healing: bool = False, self_healing_enabled: Optional[bool] = False,
code_interpreter: bool = False, code_interpreter: Optional[bool] = False,
multi_modal: Optional[bool] = None,
**kwargs: Any, **kwargs: Any,
): ):
self.llm = llm self.llm = llm
@ -257,22 +261,17 @@ class Flow:
self.loop_interval = loop_interval self.loop_interval = loop_interval
self.retry_attempts = retry_attempts self.retry_attempts = retry_attempts
self.retry_interval = retry_interval self.retry_interval = retry_interval
self.feedback = []
self.memory = []
self.task = None self.task = None
self.stopping_token = stopping_token # or "<DONE>" self.stopping_token = stopping_token # or "<DONE>"
self.interactive = interactive self.interactive = interactive
self.dashboard = dashboard self.dashboard = dashboard
self.return_history = return_history self.return_history = return_history
self.dynamic_temperature = dynamic_temperature self.dynamic_temperature_enabled = dynamic_temperature_enabled
self.dynamic_loops = dynamic_loops self.dynamic_loops = dynamic_loops
self.user_name = user_name self.user_name = user_name
self.context_length = context_length self.context_length = context_length
# SOPS to inject into the system prompt
self.sop = sop self.sop = sop
# The max_loops will be set dynamically if the dynamic_loop self.sop_list = sop_list
if self.dynamic_loops:
self.max_loops = "auto"
self.tools = tools or [] self.tools = tools or []
self.system_prompt = system_prompt self.system_prompt = system_prompt
self.agent_name = agent_name self.agent_name = agent_name
@ -280,8 +279,27 @@ class Flow:
self.saved_state_path = saved_state_path self.saved_state_path = saved_state_path
self.autosave = autosave self.autosave = autosave
self.response_filters = [] self.response_filters = []
self.self_healing = self_healing self.self_healing_enabled = self_healing_enabled
self.code_interpreter = code_interpreter self.code_interpreter = code_interpreter
self.multi_modal = multi_modal
# The max_loops will be set dynamically if the dynamic_loop
if self.dynamic_loops:
self.max_loops = "auto"
# If multimodal = yes then set the sop to the multimodal sop
if self.multi_modal:
self.sop = MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1
# If the user inputs a list of strings for the sop then join them and set the sop
if self.sop_list:
self.sop = "\n".join(self.sop_list)
# Memory
self.feedback = []
self.memory = []
# Initialize the code executor
self.code_executor = SubprocessCodeInterpreter() self.code_executor = SubprocessCodeInterpreter()
def provide_feedback(self, feedback: str) -> None: def provide_feedback(self, feedback: str) -> None:
@ -461,7 +479,7 @@ class Flow:
Retry Interval: {self.retry_interval} Retry Interval: {self.retry_interval}
Interactive: {self.interactive} Interactive: {self.interactive}
Dashboard: {self.dashboard} Dashboard: {self.dashboard}
Dynamic Temperature: {self.dynamic_temperature} Dynamic Temperature: {self.dynamic_temperature_enabled}
Autosave: {self.autosave} Autosave: {self.autosave}
Saved State: {self.saved_state_path} Saved State: {self.saved_state_path}
Model Configuration: {model_config} Model Configuration: {model_config}
@ -498,7 +516,7 @@ class Flow:
) )
print(error) print(error)
def run(self, task: str, img: Optional[str], **kwargs): def run(self, task: Optional[str], img: Optional[str] = None, **kwargs):
""" """
Run the autonomous agent loop Run the autonomous agent loop
@ -528,7 +546,10 @@ class Flow:
self.print_dashboard(task) self.print_dashboard(task)
loop_count = 0 loop_count = 0
# While the max_loops is auto or the loop count is less than the max_loops
while self.max_loops == "auto" or loop_count < self.max_loops: while self.max_loops == "auto" or loop_count < self.max_loops:
# Loop count
loop_count += 1 loop_count += 1
print( print(
colored(f"\nLoop {loop_count} of {self.max_loops}", "blue") colored(f"\nLoop {loop_count} of {self.max_loops}", "blue")
@ -543,7 +564,7 @@ class Flow:
break break
# Adjust temperature, comment if no work # Adjust temperature, comment if no work
if self.dynamic_temperature: if self.dynamic_temperature_enabled:
self.dynamic_temperature() self.dynamic_temperature()
# Preparing the prompt # Preparing the prompt
@ -649,7 +670,7 @@ class Flow:
break break
# Adjust temperature, comment if no work # Adjust temperature, comment if no work
if self.dynamic_temperature: if self.dynamic_temperature_enabled:
self.dynamic_temperature() self.dynamic_temperature()
# Preparing the prompt # Preparing the prompt
@ -994,7 +1015,7 @@ class Flow:
"retry_interval": self.retry_interval, "retry_interval": self.retry_interval,
"interactive": self.interactive, "interactive": self.interactive,
"dashboard": self.dashboard, "dashboard": self.dashboard,
"dynamic_temperature": self.dynamic_temperature, "dynamic_temperature": self.dynamic_temperature_enabled,
} }
with open(file_path, "w") as f: with open(file_path, "w") as f:

@ -29,6 +29,18 @@ class Task:
Task class for running a task in a sequential workflow. Task class for running a task in a sequential workflow.
Args:
description (str): The description of the task.
flow (Union[Callable, Flow]): The model or flow to execute the task.
args (List[Any]): Additional arguments to pass to the task execution.
kwargs (Dict[str, Any]): Additional keyword arguments to pass to the task execution.
result (Any): The result of the task execution.
history (List[Any]): The history of the task execution.
Methods:
execute: Execute the task.
Examples: Examples:
>>> from swarms.structs import Task, Flow >>> from swarms.structs import Task, Flow
>>> from swarms.models import OpenAIChat >>> from swarms.models import OpenAIChat
@ -37,8 +49,6 @@ class Task:
>>> task.execute() >>> task.execute()
>>> task.result >>> task.result
""" """
description: str description: str
@ -54,9 +64,6 @@ class Task:
Raises: Raises:
ValueError: If a Flow instance is used as a task and the 'task' argument is not provided. ValueError: If a Flow instance is used as a task and the 'task' argument is not provided.
""" """
if isinstance(self.flow, Flow): if isinstance(self.flow, Flow):
# Add a prompt to notify the Flow of the sequential workflow # Add a prompt to notify the Flow of the sequential workflow
@ -114,14 +121,20 @@ class SequentialWorkflow:
dashboard: bool = False dashboard: bool = False
def add( def add(
self, task: str, flow: Union[Callable, Flow], *args, **kwargs self,
flow: Union[Callable, Flow],
task: Optional[str] = None,
img: Optional[str] = None,
*args,
**kwargs,
) -> None: ) -> None:
""" """
Add a task to the workflow. Add a task to the workflow.
Args: Args:
task (str): The task description or the initial input for the Flow.
flow (Union[Callable, Flow]): The model or flow to execute the task. flow (Union[Callable, Flow]): The model or flow to execute the task.
task (str): The task description or the initial input for the Flow.
img (str): The image to understand for the task.
*args: Additional arguments to pass to the task execution. *args: Additional arguments to pass to the task execution.
**kwargs: Additional keyword arguments to pass to the task execution. **kwargs: Additional keyword arguments to pass to the task execution.
""" """
@ -130,8 +143,21 @@ class SequentialWorkflow:
kwargs["task"] = task # Set the task as a keyword argument for Flow kwargs["task"] = task # Set the task as a keyword argument for Flow
# Append the task to the tasks list # Append the task to the tasks list
if self.img:
self.tasks.append(
Task(
description=task,
flow=flow,
args=list(args),
kwargs=kwargs,
img=img,
)
)
else:
self.tasks.append( self.tasks.append(
Task(description=task, flow=flow, args=list(args), kwargs=kwargs) Task(
description=task, flow=flow, args=list(args), kwargs=kwargs
)
) )
def reset_workflow(self) -> None: def reset_workflow(self) -> None:
@ -148,18 +174,16 @@ class SequentialWorkflow:
""" """
return {task.description: task.result for task in self.tasks} return {task.description: task.result for task in self.tasks}
def remove_task(self, task_description: str) -> None: def remove_task(self, task: str) -> None:
"""Remove tasks from sequential workflow""" """Remove tasks from sequential workflow"""
self.tasks = [ self.tasks = [task for task in self.tasks if task.description != task]
task for task in self.tasks if task.description != task_description
]
def update_task(self, task_description: str, **updates) -> None: def update_task(self, task: str, **updates) -> None:
""" """
Updates the arguments of a task in the workflow. Updates the arguments of a task in the workflow.
Args: Args:
task_description (str): The description of the task to update. task (str): The description of the task to update.
**updates: The updates to apply to the task. **updates: The updates to apply to the task.
Raises: Raises:
@ -178,11 +202,11 @@ class SequentialWorkflow:
""" """
for task in self.tasks: for task in self.tasks:
if task.description == task_description: if task.description == task:
task.kwargs.update(updates) task.kwargs.update(updates)
break break
else: else:
raise ValueError(f"Task {task_description} not found in workflow.") raise ValueError(f"Task {task} not found in workflow.")
def save_workflow_state( def save_workflow_state(
self, self,
@ -272,6 +296,7 @@ class SequentialWorkflow:
) )
def workflow_shutdown(self, **kwargs) -> None: def workflow_shutdown(self, **kwargs) -> None:
"""Shuts down the workflow."""
print( print(
colored( colored(
""" """
@ -282,6 +307,7 @@ class SequentialWorkflow:
) )
def add_objective_to_workflow(self, task: str, **kwargs) -> None: def add_objective_to_workflow(self, task: str, **kwargs) -> None:
"""Adds an objective to the workflow."""
print( print(
colored( colored(
""" """

@ -0,0 +1,30 @@
import logging
import os
import warnings
def disable_logging():
warnings.filterwarnings("ignore", category=UserWarning)
# disable tensorflow warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
# Set the logging level for the entire module
logging.basicConfig(level=logging.WARNING)
try:
log = logging.getLogger("pytorch")
log.propagate = False
log.setLevel(logging.ERROR)
except Exception as error:
print(f"Pytorch logging not disabled: {error}")
for logger_name in [
"tensorflow",
"h5py",
"numexpr",
"git",
"wandb.docker.auth",
]:
logger = logging.getLogger(logger_name)
logger.setLevel(logging.WARNING) # Supress DEBUG and info logs
Loading…
Cancel
Save