[FEAT][Flow.run() img = None for conditional img inputs, BaseMultiModalModel, and multi modal swarms of manufacturing agents

pull/197/head
Kye 1 year ago
parent f895497f88
commit a92a6a5c13

@ -1,9 +1,21 @@
import os
from dotenv import load_dotenv
# Import the OpenAIChat model and the Flow struct
from swarms.models import OpenAIChat from swarms.models import OpenAIChat
from swarms.structs import Flow from swarms.structs import Flow
# Load the environment variables
load_dotenv()
# Get the API key from the environment
api_key = os.environ.get("OPENAI_API_KEY")
# Initialize the language model # Initialize the language model
llm = OpenAIChat( llm = OpenAIChat(
temperature=0.5, temperature=0.5,
openai_api_key=api_key,
) )

@ -1,5 +1,8 @@
from swarms.structs import Flow from swarms.structs import Flow
from swarms.models.gpt4_vision_api import GPT4VisionAPI from swarms.models.gpt4_vision_api import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)
llm = GPT4VisionAPI() llm = GPT4VisionAPI()
@ -10,6 +13,7 @@ img = "images/swarms.jpeg"
## Initialize the workflow ## Initialize the workflow
flow = Flow( flow = Flow(
llm=llm, llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
max_loops="auto", max_loops="auto",
) )

@ -0,0 +1,7 @@
"""
Idea 2 img
task -> gpt4 text -> dalle3 img -> gpt4vision img + text analyze img -> dalle3 img -> loop
"""
from swarms.models.gpt4_vision_api import GPT4VisionAPI

@ -0,0 +1,15 @@
"""
Swarm of multi modal autonomous agents for manufacturing!
---------------------------------------------------------
Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest
Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest
Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest
Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest
Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest
Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest
Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest
Flow:
health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
"""

@ -1,3 +1,4 @@
from abc import abstractmethod
import asyncio import asyncio
import base64 import base64
import concurrent.futures import concurrent.futures
@ -7,8 +8,8 @@ from io import BytesIO
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import requests import requests
from ABC import abstractmethod
from PIL import Image from PIL import Image
from termcolor import colored
class BaseMultiModalModel: class BaseMultiModalModel:
@ -37,7 +38,6 @@ class BaseMultiModalModel:
self.retries = retries self.retries = retries
self.chat_history = [] self.chat_history = []
@abstractmethod @abstractmethod
def __call__(self, text: str, img: str): def __call__(self, text: str, img: str):
"""Run the model""" """Run the model"""
@ -61,17 +61,17 @@ class BaseMultiModalModel:
except requests.RequestException as error: except requests.RequestException as error:
print(f"Error fetching image from {img} and error: {error}") print(f"Error fetching image from {img} and error: {error}")
return None return None
def encode_img(self, img: str): def encode_img(self, img: str):
"""Encode the image to base64""" """Encode the image to base64"""
with open(img, "rb") as image_file: with open(img, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8") return base64.b64encode(image_file.read()).decode("utf-8")
def get_img(self, img: str): def get_img(self, img: str):
"""Get the image from the path""" """Get the image from the path"""
image_pil = Image.open(img) image_pil = Image.open(img)
return image_pil return image_pil
def clear_chat_history(self): def clear_chat_history(self):
"""Clear the chat history""" """Clear the chat history"""
self.chat_history = [] self.chat_history = []
@ -87,11 +87,11 @@ class BaseMultiModalModel:
Args: Args:
tasks (List[str]): List of tasks tasks (List[str]): List of tasks
imgs (List[str]): List of image paths imgs (List[str]): List of image paths
Returns: Returns:
List[str]: List of responses List[str]: List of responses
""" """
# Instantiate the thread pool executor # Instantiate the thread pool executor
with ThreadPoolExecutor(max_workers=self.max_workers) as executor: with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
@ -101,7 +101,6 @@ class BaseMultiModalModel:
for result in results: for result in results:
print(result) print(result)
def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]: def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]:
"""Process a batch of tasks and images""" """Process a batch of tasks and images"""
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
@ -133,11 +132,11 @@ class BaseMultiModalModel:
for task, img in tasks_images for task, img in tasks_images
] ]
return await asyncio.gather(*futures) return await asyncio.gather(*futures)
def unique_chat_history(self): def unique_chat_history(self):
"""Get the unique chat history""" """Get the unique chat history"""
return list(set(self.chat_history)) return list(set(self.chat_history))
def run_with_retries(self, task: str, img: str): def run_with_retries(self, task: str, img: str):
"""Run the model with retries""" """Run the model with retries"""
for i in range(self.retries): for i in range(self.retries):
@ -146,7 +145,7 @@ class BaseMultiModalModel:
except Exception as error: except Exception as error:
print(f"Error with the request {error}") print(f"Error with the request {error}")
continue continue
def run_batch_with_retries(self, tasks_images: List[Tuple[str, str]]): def run_batch_with_retries(self, tasks_images: List[Tuple[str, str]]):
"""Run the model with retries""" """Run the model with retries"""
for i in range(self.retries): for i in range(self.retries):
@ -188,28 +187,37 @@ class BaseMultiModalModel:
if self.start_time and self.end_time: if self.start_time and self.end_time:
return self.end_time - self.start_time return self.end_time - self.start_time
return 0 return 0
def get_chat_history(self): def get_chat_history(self):
"""Get the chat history""" """Get the chat history"""
return self.chat_history return self.chat_history
def get_unique_chat_history(self): def get_unique_chat_history(self):
"""Get the unique chat history""" """Get the unique chat history"""
return list(set(self.chat_history)) return list(set(self.chat_history))
def get_chat_history_length(self): def get_chat_history_length(self):
"""Get the chat history length""" """Get the chat history length"""
return len(self.chat_history) return len(self.chat_history)
def get_unique_chat_history_length(self): def get_unique_chat_history_length(self):
"""Get the unique chat history length""" """Get the unique chat history length"""
return len(list(set(self.chat_history))) return len(list(set(self.chat_history)))
def get_chat_history_tokens(self): def get_chat_history_tokens(self):
"""Get the chat history tokens""" """Get the chat history tokens"""
return self._num_tokens() return self._num_tokens()
def print_beautiful(self, content: str, color: str = "cyan"): def print_beautiful(self, content: str, color: str = "cyan"):
"""Print Beautifully with termcolor""" """Print Beautifully with termcolor"""
content = colored(content, color) content = colored(content, color)
print(content) print(content)
def stream(self, content: str):
"""Stream the output
Args:
content (str): _description_
"""
for chunk in content:
print(chunk)

@ -1,6 +1,7 @@
import logging import logging
import asyncio import asyncio
import base64 import base64
from typing import Optional
import concurrent.futures import concurrent.futures
from termcolor import colored from termcolor import colored
import json import json
@ -12,6 +13,13 @@ import aiohttp
import requests import requests
from dotenv import load_dotenv from dotenv import load_dotenv
try:
import cv2
except ImportError:
print("OpenCV not installed. Please install OpenCV to use this model.")
raise ImportError
# Load environment variables # Load environment variables
load_dotenv() load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY") openai_api_key = os.getenv("OPENAI_API_KEY")
@ -59,7 +67,8 @@ class GPT4VisionAPI:
max_workers: int = 10, max_workers: int = 10,
max_tokens: str = 300, max_tokens: str = 300,
openai_proxy: str = "https://api.openai.com/v1/chat/completions", openai_proxy: str = "https://api.openai.com/v1/chat/completions",
beautify: bool = False beautify: bool = False,
streaming_enabled: Optional[bool] = False,
): ):
super().__init__() super().__init__()
self.openai_api_key = openai_api_key self.openai_api_key = openai_api_key
@ -69,6 +78,7 @@ class GPT4VisionAPI:
self.max_tokens = max_tokens self.max_tokens = max_tokens
self.openai_proxy = openai_proxy self.openai_proxy = openai_proxy
self.beautify = beautify self.beautify = beautify
self.streaming_enabled = streaming_enabled
if self.logging_enabled: if self.logging_enabled:
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
@ -123,14 +133,101 @@ class GPT4VisionAPI:
out = response.json() out = response.json()
content = out["choices"][0]["message"]["content"] content = out["choices"][0]["message"]["content"]
if self.streaming_enabled:
content = self.stream_response(content)
else:
pass
if self.beautify: if self.beautify:
content = colored(content, "cyan") content = colored(content, "cyan")
print(content)
else: else:
print(content) print(content)
except Exception as error: except Exception as error:
print(f"Error with the request: {error}") print(f"Error with the request: {error}")
raise error raise error
def video_prompt(self, frames):
"""
SystemPrompt is a class that generates a prompt for the user to respond to.
The prompt is generated based on the current state of the system.
Parameters
----------
frames : list
A list of base64 frames
Returns
-------
PROMPT : str
The system prompt
Examples
--------
>>> from swarms.models import GPT4VisionAPI
>>> llm = GPT4VisionAPI()
>>> video = "video.mp4"
>>> base64_frames = llm.process_video(video)
>>> prompt = llm.video_prompt(base64_frames)
>>> print(prompt)
"""
PROMPT = f"""
These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video:
{frames}
"""
return PROMPT
def stream_response(self, content: str):
"""Stream the response of the output
Args:
content (str): _description_
"""
for chunk in content:
print(chunk)
def process_video(self, video: str):
"""
Process a video into a list of base64 frames
Parameters
----------
video : str
The path to the video file
Returns
-------
base64_frames : list
A list of base64 frames
Examples
--------
>>> from swarms.models import GPT4VisionAPI
>>> llm = GPT4VisionAPI()
>>> video = "video.mp4"
>>> base64_frames = llm.process_video(video)
"""
video = cv2.VideoCapture(video)
base64_frames = []
while video.isOpened():
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode(".jpg", frame)
base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
video.release()
print(len(base64_frames), "frames read.")
for img in base64_frames:
base64.b64decode(img.encode("utf-8"))
def __call__(self, task: str, img: str): def __call__(self, task: str, img: str):
"""Run the model.""" """Run the model."""
try: try:
@ -168,10 +265,17 @@ class GPT4VisionAPI:
out = response.json() out = response.json()
content = out["choices"][0]["message"]["content"] content = out["choices"][0]["message"]["content"]
if self.streaming_enabled:
content = self.stream_response(content)
else:
pass
if self.beautify: if self.beautify:
content = colored(content, "cyan") content = colored(content, "cyan")
print(content)
else: else:
print(content) print(content)
except Exception as error: except Exception as error:
print(f"Error with the request: {error}") print(f"Error with the request: {error}")
raise error raise error

@ -24,7 +24,7 @@ class Kosmos:
---------- ----------
model_name : str model_name : str
Path to the pretrained model Path to the pretrained model
Examples Examples
-------- --------
>>> kosmos = Kosmos() >>> kosmos = Kosmos()

@ -99,7 +99,9 @@ class WhisperX:
print("The key 'segments' is not found in the result.") print("The key 'segments' is not found in the result.")
def transcribe(self, audio_file): def transcribe(self, audio_file):
model = whisperx_model.load_model("large-v2", self.device, self.compute_type) model = whisperx_model.load_model(
"large-v2", self.device, self.compute_type
)
audio = whisperx_model.load_audio(audio_file) audio = whisperx_model.load_audio(audio_file)
result = model.transcribe(audio, batch_size=self.batch_size) result = model.transcribe(audio, batch_size=self.batch_size)

@ -498,7 +498,7 @@ class Flow:
) )
print(error) print(error)
def run(self, task: str, img: Optional[str], **kwargs): def run(self, task: Optional[str], img: Optional[str] = None, **kwargs):
""" """
Run the autonomous agent loop Run the autonomous agent loop
@ -528,7 +528,11 @@ class Flow:
self.print_dashboard(task) self.print_dashboard(task)
loop_count = 0 loop_count = 0
# While the max_loops is auto or the loop count is less than the max_loops
while self.max_loops == "auto" or loop_count < self.max_loops: while self.max_loops == "auto" or loop_count < self.max_loops:
# Loop count
loop_count += 1 loop_count += 1
print( print(
colored(f"\nLoop {loop_count} of {self.max_loops}", "blue") colored(f"\nLoop {loop_count} of {self.max_loops}", "blue")

@ -1,15 +1,14 @@
import logging import logging
import os import os
import warnings import warnings
def disable_logging(): def disable_logging():
warnings.filterwarnings("ignore", category=UserWarning) warnings.filterwarnings("ignore", category=UserWarning)
# disable tensorflow warnings # disable tensorflow warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
# Set the logging level for the entire module # Set the logging level for the entire module
logging.basicConfig(level=logging.WARNING) logging.basicConfig(level=logging.WARNING)
@ -20,6 +19,12 @@ def disable_logging():
except Exception as error: except Exception as error:
print(f"Pytorch logging not disabled: {error}") print(f"Pytorch logging not disabled: {error}")
for logger_name in ['tensorflow', 'h5py', 'numexpr', 'git', 'wandb.docker.auth']: for logger_name in [
"tensorflow",
"h5py",
"numexpr",
"git",
"wandb.docker.auth",
]:
logger = logging.getLogger(logger_name) logger = logging.getLogger(logger_name)
logger.setLevel(logging.WARNING) # Supress DEBUG and info logs logger.setLevel(logging.WARNING) # Supress DEBUG and info logs

Loading…
Cancel
Save