gpt4vision features and api

pull/197/head
Kye 1 year ago
parent b9fea7b9c8
commit 9390efb8aa

@ -117,6 +117,32 @@ workflow.run()
for task in workflow.tasks: for task in workflow.tasks:
print(f"Task: {task.description}, Result: {task.result}") print(f"Task: {task.description}, Result: {task.result}")
```
## `Multi Modal Autonomous Agents`
- Run the flow with multiple modalities useful for various real-world tasks in manufacturing, logistics, and health.
```python
from swarms.structs import Flow
from swarms.models.gpt4_vision_api import GPT4VisionAPI
# Initialize the llm
llm = GPT4VisionAPI()
task = "Analyze this image of an assembly line and identify any issues such as misaligned parts, defects, or deviations from the standard assembly process. IF there is anything unsafe in the image, explain why it is unsafe and how it could be improved."
img = "assembly_line.jpg"
## Initialize the workflow
flow = Flow(
llm=llm,
max_loops=1,
dashboard=True,
)
# Run the flow
flow.run(task=task, img=img)
``` ```
--- ---

@ -11,7 +11,6 @@ img = "images/swarms.jpeg"
flow = Flow( flow = Flow(
llm=llm, llm=llm,
max_loops="auto", max_loops="auto",
) )
flow.run(task=task, img=img) flow.run(task=task, img=img)

@ -4,7 +4,12 @@ from swarms.models.gpt4_vision_api import GPT4VisionAPI
llm = GPT4VisionAPI() llm = GPT4VisionAPI()
task = "Analyze this image of an assembly line and identify any issues such as misaligned parts, defects, or deviations from the standard assembly process. IF there is anything unsafe in the image, explain why it is unsafe and how it could be improved." task = (
"Analyze this image of an assembly line and identify any issues such as"
" misaligned parts, defects, or deviations from the standard assembly"
" process. IF there is anything unsafe in the image, explain why it is"
" unsafe and how it could be improved."
)
img = "assembly_line.jpg" img = "assembly_line.jpg"
## Initialize the workflow ## Initialize the workflow

File diff suppressed because it is too large Load Diff

@ -1,8 +1,15 @@
import asyncio
import base64 import base64
import concurrent.futures
from termcolor import colored
import json
import os import os
from concurrent.futures import ThreadPoolExecutor
from typing import List, Tuple
import aiohttp
import requests import requests
from dotenv import load_dotenv from dotenv import load_dotenv
import concurrent.futures
# Load environment variables # Load environment variables
load_dotenv() load_dotenv()
@ -22,7 +29,7 @@ class GPT4VisionAPI:
max_tokens : int max_tokens : int
The maximum number of tokens to generate. Defaults to 300. The maximum number of tokens to generate. Defaults to 300.
Methods Methods
------- -------
encode_image(img: str) encode_image(img: str)
@ -43,16 +50,29 @@ class GPT4VisionAPI:
""" """
def __init__(self, openai_api_key: str = openai_api_key, max_tokens: str = 300): def __init__(
self,
openai_api_key: str = openai_api_key,
model_name: str = "gpt-4-vision-preview",
max_workers: int = 10,
max_tokens: str = 300,
openai_proxy: str = "https://api.openai.com/v1/chat/completions",
):
super().__init__() super().__init__()
self.openai_api_key = openai_api_key self.openai_api_key = openai_api_key
self.model_name = model_name
self.max_workers = max_workers
self.max_tokens = max_tokens self.max_tokens = max_tokens
self.openai_proxy = openai_proxy
def encode_image(self, img: str): def encode_image(self, img: str):
"""Encode image to base64.""" """Encode image to base64."""
with open(img, "rb") as image_file: with open(img, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8") return base64.b64encode(image_file.read()).decode("utf-8")
def download_img_then_encode(self, img: str):
"""Download image from URL then encode image to base64 using requests"""
# Function to handle vision tasks # Function to handle vision tasks
def run(self, task: str, img: str): def run(self, task: str, img: str):
"""Run the model.""" """Run the model."""
@ -63,7 +83,7 @@ class GPT4VisionAPI:
"Authorization": f"Bearer {openai_api_key}", "Authorization": f"Bearer {openai_api_key}",
} }
payload = { payload = {
"model": "gpt-4-vision-preview", "model": self.model_name,
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@ -125,7 +145,7 @@ class GPT4VisionAPI:
"max_tokens": self.max_tokens, "max_tokens": self.max_tokens,
} }
response = requests.post( response = requests.post(
"https://api.openai.com/v1/chat/completions", self.openai_proxy,
headers=headers, headers=headers,
json=payload, json=payload,
) )
@ -138,3 +158,135 @@ class GPT4VisionAPI:
raise error raise error
# Function to handle vision tasks # Function to handle vision tasks
def run_many(
self,
tasks: List[str],
imgs: List[str],
):
"""
Run the model on multiple tasks and images all at once using concurrent
"""
# Instantiate the thread pool executor
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
results = executor.map(self.run, tasks, imgs)
# Print the results for debugging
for result in results:
print(result)
return list(results)
async def arun(
self,
task: str,
img: str,
):
"""
Asynchronously run the model
Overview:
---------
This method is used to asynchronously run the model. It is used to run the model
on a single task and image.
Parameters:
----------
task : str
The task to run the model on.
img : str
The image to run the task on
"""
try:
base64_image = self.encode_image(img)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai_api_key}",
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": task},
{
"type": "image_url",
"image_url": {
"url": (
f"data:image/jpeg;base64,{base64_image}"
)
},
},
],
}
],
"max_tokens": self.max_tokens,
}
async with aiohttp.ClientSession() as session:
async with session.post(
self.openai_proxy, headers=headers, data=json.dumps(payload)
) as response:
out = await response.json()
content = out["choices"][0]["message"]["content"]
print(content)
except Exception as error:
print(f"Error with the request {error}")
raise error
def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]:
"""Process a batch of tasks and images"""
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [
executor.submit(self.run, task, img)
for task, img in tasks_images
]
results = [future.result() for future in futures]
return results
async def run_batch_async(
self, tasks_images: List[Tuple[str, str]]
) -> List[str]:
"""Process a batch of tasks and images asynchronously"""
loop = asyncio.get_event_loop()
futures = [
loop.run_in_executor(None, self.run, task, img)
for task, img in tasks_images
]
return await asyncio.gather(*futures)
async def run_batch_async_with_retries(
self, tasks_images: List[Tuple[str, str]]
) -> List[str]:
"""Process a batch of tasks and images asynchronously with retries"""
loop = asyncio.get_event_loop()
futures = [
loop.run_in_executor(None, self.run_with_retries, task, img)
for task, img in tasks_images
]
return await asyncio.gather(*futures)
def health_check(self):
"""Health check for the GPT4Vision model"""
try:
response = requests.get("https://api.openai.com/v1/engines")
return response.status_code == 200
except requests.RequestException as error:
print(f"Health check failed: {error}")
return False
def print_dashboard(self):
dashboard = print(
colored(
f"""
GPT4Vision Dashboard
-------------------
Model: {self.model_name}
Max Workers: {self.max_workers}
OpenAIProxy: {self.openai_proxy}
""",
"green",
)
)
return dashboard

@ -1,9 +1,12 @@
import asyncio
import os
from unittest.mock import AsyncMock, Mock, mock_open, patch
from aiohttp import ClientResponseError
import pytest import pytest
from unittest.mock import mock_open, patch, Mock from dotenv import load_dotenv
from requests.exceptions import RequestException from requests.exceptions import RequestException
from swarms.models.gpt4_vision_api import GPT4VisionAPI from swarms.models.gpt4_vision_api import GPT4VisionAPI
import os
from dotenv import load_dotenv
load_dotenv() load_dotenv()
@ -120,3 +123,116 @@ def test_call_method_with_exception(gpt_api):
with patch("requests.post", side_effect=Exception("Test Exception")): with patch("requests.post", side_effect=Exception("Test Exception")):
with pytest.raises(Exception): with pytest.raises(Exception):
gpt_api(task, img_url) gpt_api(task, img_url)
@pytest.mark.asyncio
async def test_arun_success(vision_api):
expected_response = {
"choices": [{"message": {"content": "This is the model's response."}}]
}
with patch(
"aiohttp.ClientSession.post",
new_callable=AsyncMock,
return_value=AsyncMock(json=AsyncMock(return_value=expected_response)),
) as mock_post:
result = await vision_api.arun("What is this?", img)
mock_post.assert_called_once()
assert result == "This is the model's response."
@pytest.mark.asyncio
async def test_arun_request_error(vision_api):
with patch(
"aiohttp.ClientSession.post",
new_callable=AsyncMock,
side_effect=Exception("Request Error"),
) as mock_post:
with pytest.raises(Exception):
await vision_api.arun("What is this?", img)
def test_run_many_success(vision_api):
expected_response = {
"choices": [{"message": {"content": "This is the model's response."}}]
}
with patch(
"requests.post", return_value=Mock(json=lambda: expected_response)
) as mock_post:
tasks = ["What is this?", "What is that?"]
imgs = [img, img]
results = vision_api.run_many(tasks, imgs)
assert mock_post.call_count == 2
assert results == [
"This is the model's response.",
"This is the model's response.",
]
def test_run_many_request_error(vision_api):
with patch(
"requests.post", side_effect=RequestException("Request Error")
) as mock_post:
tasks = ["What is this?", "What is that?"]
imgs = [img, img]
with pytest.raises(RequestException):
vision_api.run_many(tasks, imgs)
@pytest.mark.asyncio
async def test_arun_json_decode_error(vision_api):
with patch(
"aiohttp.ClientSession.post",
new_callable=AsyncMock,
return_value=AsyncMock(json=AsyncMock(side_effect=ValueError)),
) as mock_post:
with pytest.raises(ValueError):
await vision_api.arun("What is this?", img)
@pytest.mark.asyncio
async def test_arun_api_error(vision_api):
error_response = {"error": {"message": "API Error"}}
with patch(
"aiohttp.ClientSession.post",
new_callable=AsyncMock,
return_value=AsyncMock(json=AsyncMock(return_value=error_response)),
) as mock_post:
with pytest.raises(Exception, match="API Error"):
await vision_api.arun("What is this?", img)
@pytest.mark.asyncio
async def test_arun_unexpected_response(vision_api):
unexpected_response = {"unexpected": "response"}
with patch(
"aiohttp.ClientSession.post",
new_callable=AsyncMock,
return_value=AsyncMock(
json=AsyncMock(return_value=unexpected_response)
),
) as mock_post:
with pytest.raises(Exception, match="Unexpected response"):
await vision_api.arun("What is this?", img)
@pytest.mark.asyncio
async def test_arun_retries(vision_api):
with patch(
"aiohttp.ClientSession.post",
new_callable=AsyncMock,
side_effect=ClientResponseError(None, None),
) as mock_post:
with pytest.raises(ClientResponseError):
await vision_api.arun("What is this?", img)
assert mock_post.call_count == vision_api.retries + 1
@pytest.mark.asyncio
async def test_arun_timeout(vision_api):
with patch(
"aiohttp.ClientSession.post",
new_callable=AsyncMock,
side_effect=asyncio.TimeoutError,
) as mock_post:
with pytest.raises(asyncio.TimeoutError):
await vision_api.arun("What is this?", img)

@ -1,93 +0,0 @@
import unittest
from unittest.mock import patch
from RevChatGPTModelv4 import RevChatGPTModelv4
class TestRevChatGPT(unittest.TestCase):
def setUp(self):
self.access_token = "123"
self.model = RevChatGPTModelv4(access_token=self.access_token)
def test_run(self):
prompt = "What is the capital of France?"
self.model.start_time = 10
self.model.end_time = 20
response = self.model.run(prompt)
self.assertEqual(response, "The capital of France is Paris.")
self.assertEqual(self.model.start_time, 10)
self.assertEqual(self.model.end_time, 20)
def test_generate_summary(self):
text = "Hello world. This is some text. It has multiple sentences."
summary = self.model.generate_summary(text)
self.assertEqual(summary, "")
@patch("RevChatGPTModelv4.Chatbot.install_plugin")
def test_enable_plugin(self, mock_install_plugin):
plugin_id = "plugin123"
self.model.enable_plugin(plugin_id)
mock_install_plugin.assert_called_with(plugin_id=plugin_id)
@patch("RevChatGPTModelv4.Chatbot.get_plugins")
def test_list_plugins(self, mock_get_plugins):
mock_get_plugins.return_value = [{"id": "123", "name": "Test Plugin"}]
plugins = self.model.list_plugins()
self.assertEqual(len(plugins), 1)
self.assertEqual(plugins[0]["id"], "123")
self.assertEqual(plugins[0]["name"], "Test Plugin")
@patch("RevChatGPTModelv4.Chatbot.get_conversations")
def test_get_conversations(self, mock_get_conversations):
self.model.chatbot.get_conversations()
mock_get_conversations.assert_called()
@patch("RevChatGPTModelv4.Chatbot.get_msg_history")
def test_get_msg_history(self, mock_get_msg_history):
convo_id = "123"
self.model.chatbot.get_msg_history(convo_id)
mock_get_msg_history.assert_called_with(convo_id)
@patch("RevChatGPTModelv4.Chatbot.share_conversation")
def test_share_conversation(self, mock_share_conversation):
self.model.chatbot.share_conversation()
mock_share_conversation.assert_called()
@patch("RevChatGPTModelv4.Chatbot.gen_title")
def test_gen_title(self, mock_gen_title):
convo_id = "123"
message_id = "456"
self.model.chatbot.gen_title(convo_id, message_id)
mock_gen_title.assert_called_with(convo_id, message_id)
@patch("RevChatGPTModelv4.Chatbot.change_title")
def test_change_title(self, mock_change_title):
convo_id = "123"
title = "New Title"
self.model.chatbot.change_title(convo_id, title)
mock_change_title.assert_called_with(convo_id, title)
@patch("RevChatGPTModelv4.Chatbot.delete_conversation")
def test_delete_conversation(self, mock_delete_conversation):
convo_id = "123"
self.model.chatbot.delete_conversation(convo_id)
mock_delete_conversation.assert_called_with(convo_id)
@patch("RevChatGPTModelv4.Chatbot.clear_conversations")
def test_clear_conversations(self, mock_clear_conversations):
self.model.chatbot.clear_conversations()
mock_clear_conversations.assert_called()
@patch("RevChatGPTModelv4.Chatbot.rollback_conversation")
def test_rollback_conversation(self, mock_rollback_conversation):
num = 2
self.model.chatbot.rollback_conversation(num)
mock_rollback_conversation.assert_called_with(num)
@patch("RevChatGPTModelv4.Chatbot.reset_chat")
def test_reset_chat(self, mock_reset_chat):
self.model.chatbot.reset_chat()
mock_reset_chat.assert_called()
if __name__ == "__main__":
unittest.main()
Loading…
Cancel
Save