gpt4vision features and api

1 year ago · 9390efb8aa
parent b9fea7b9c8
commit 9390efb8aa
7 changed files with 308 additions and 1103 deletions
--- a/README.md
+++ b/README.md
@ -117,6 +117,32 @@ workflow.run()
 for task in workflow.tasks:
    print(f"Task: {task.description}, Result: {task.result}")

+```
+
+## `Multi Modal Autonomous Agents`
+- Run the flow with multiple modalities useful for various real-world tasks in manufacturing, logistics, and health.
+
+```python
+from swarms.structs import Flow
+from swarms.models.gpt4_vision_api import GPT4VisionAPI
+
+# Initialize the llm
+llm = GPT4VisionAPI()
+
+task = "Analyze this image of an assembly line and identify any issues such as misaligned parts, defects, or deviations from the standard assembly process. IF there is anything unsafe in the image, explain why it is unsafe and how it could be improved."
+img = "assembly_line.jpg"
+
+## Initialize the workflow
+flow = Flow(
+    llm=llm,
+    max_loops=1,
+    dashboard=True,
+)
+
+# Run the flow
+flow.run(task=task, img=img)
+
+
 ```

 ---
--- a/multi_modal_auto_agent.py
+++ b/multi_modal_auto_agent.py
@ -11,7 +11,6 @@ img = "images/swarms.jpeg"
 flow = Flow(
    llm=llm,
    max_loops="auto",
-
 )

 flow.run(task=task, img=img)
--- a/playground/demos/assembly/assembly.py
+++ b/playground/demos/assembly/assembly.py
@ -4,7 +4,12 @@ from swarms.models.gpt4_vision_api import GPT4VisionAPI

 llm = GPT4VisionAPI()

-task = "Analyze this image of an assembly line and identify any issues such as misaligned parts, defects, or deviations from the standard assembly process. IF there is anything unsafe in the image, explain why it is unsafe and how it could be improved."
+task = (
+    "Analyze this image of an assembly line and identify any issues such as"
+    " misaligned parts, defects, or deviations from the standard assembly"
+    " process. IF there is anything unsafe in the image, explain why it is"
+    " unsafe and how it could be improved."
+)
 img = "assembly_line.jpg"

 ## Initialize the workflow
--- a/swarms/models/fast_vit_classes.json
+++ b/swarms/models/fast_vit_classes.json
--- a/swarms/models/gpt4_vision_api.py
+++ b/swarms/models/gpt4_vision_api.py
@ -1,8 +1,15 @@
+import asyncio
 import base64
+import concurrent.futures
+from termcolor import colored
+import json
 import os
+from concurrent.futures import ThreadPoolExecutor
+from typing import List, Tuple
+
+import aiohttp
 import requests
 from dotenv import load_dotenv
-import concurrent.futures

 # Load environment variables
 load_dotenv()
@ -22,7 +29,7 @@ class GPT4VisionAPI:
    max_tokens : int
        The maximum number of tokens to generate. Defaults to 300.

-    
+
    Methods
    -------
    encode_image(img: str)
@ -43,16 +50,29 @@ class GPT4VisionAPI:

    """

-    def __init__(self, openai_api_key: str = openai_api_key, max_tokens: str = 300):
+    def __init__(
+        self,
+        openai_api_key: str = openai_api_key,
+        model_name: str = "gpt-4-vision-preview",
+        max_workers: int = 10,
+        max_tokens: str = 300,
+        openai_proxy: str = "https://api.openai.com/v1/chat/completions",
+    ):
        super().__init__()
        self.openai_api_key = openai_api_key
+        self.model_name = model_name
+        self.max_workers = max_workers
        self.max_tokens = max_tokens
+        self.openai_proxy = openai_proxy

    def encode_image(self, img: str):
        """Encode image to base64."""
        with open(img, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode("utf-8")

+    def download_img_then_encode(self, img: str):
+        """Download image from URL then encode image to base64 using requests"""
+
    # Function to handle vision tasks
    def run(self, task: str, img: str):
        """Run the model."""
@ -63,7 +83,7 @@ class GPT4VisionAPI:
                "Authorization": f"Bearer {openai_api_key}",
            }
            payload = {
-                "model": "gpt-4-vision-preview",
+                "model": self.model_name,
                "messages": [
                    {
                        "role": "user",
@ -125,7 +145,7 @@ class GPT4VisionAPI:
                "max_tokens": self.max_tokens,
            }
            response = requests.post(
-                "https://api.openai.com/v1/chat/completions",
+                self.openai_proxy,
                headers=headers,
                json=payload,
            )
@ -138,3 +158,135 @@ class GPT4VisionAPI:
            raise error
        # Function to handle vision tasks

+    def run_many(
+        self,
+        tasks: List[str],
+        imgs: List[str],
+    ):
+        """
+        Run the model on multiple tasks and images all at once using concurrent
+
+        """
+        # Instantiate the thread pool executor
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            results = executor.map(self.run, tasks, imgs)
+
+        # Print the results for debugging
+        for result in results:
+            print(result)
+
+        return list(results)
+
+    async def arun(
+        self,
+        task: str,
+        img: str,
+    ):
+        """
+        Asynchronously run the model
+
+        Overview:
+        ---------
+        This method is used to asynchronously run the model. It is used to run the model
+        on a single task and image.
+
+        Parameters:
+        ----------
+        task : str
+            The task to run the model on.
+        img : str
+            The image to run the task on
+
+        """
+        try:
+            base64_image = self.encode_image(img)
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {openai_api_key}",
+            }
+            payload = {
+                "model": "gpt-4-vision-preview",
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": task},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": (
+                                        f"data:image/jpeg;base64,{base64_image}"
+                                    )
+                                },
+                            },
+                        ],
+                    }
+                ],
+                "max_tokens": self.max_tokens,
+            }
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    self.openai_proxy, headers=headers, data=json.dumps(payload)
+                ) as response:
+                    out = await response.json()
+                    content = out["choices"][0]["message"]["content"]
+                    print(content)
+        except Exception as error:
+            print(f"Error with the request {error}")
+            raise error
+
+    def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]:
+        """Process a batch of tasks and images"""
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = [
+                executor.submit(self.run, task, img)
+                for task, img in tasks_images
+            ]
+            results = [future.result() for future in futures]
+        return results
+
+    async def run_batch_async(
+        self, tasks_images: List[Tuple[str, str]]
+    ) -> List[str]:
+        """Process a batch of tasks and images asynchronously"""
+        loop = asyncio.get_event_loop()
+        futures = [
+            loop.run_in_executor(None, self.run, task, img)
+            for task, img in tasks_images
+        ]
+        return await asyncio.gather(*futures)
+
+    async def run_batch_async_with_retries(
+        self, tasks_images: List[Tuple[str, str]]
+    ) -> List[str]:
+        """Process a batch of tasks and images asynchronously with retries"""
+        loop = asyncio.get_event_loop()
+        futures = [
+            loop.run_in_executor(None, self.run_with_retries, task, img)
+            for task, img in tasks_images
+        ]
+        return await asyncio.gather(*futures)
+
+    def health_check(self):
+        """Health check for the GPT4Vision model"""
+        try:
+            response = requests.get("https://api.openai.com/v1/engines")
+            return response.status_code == 200
+        except requests.RequestException as error:
+            print(f"Health check failed: {error}")
+            return False
+
+    def print_dashboard(self):
+        dashboard = print(
+            colored(
+                f"""
+            GPT4Vision Dashboard
+            -------------------
+            Model: {self.model_name}
+            Max Workers: {self.max_workers}
+            OpenAIProxy: {self.openai_proxy}
+            """,
+                "green",
+            )
+        )
+        return dashboard
--- a/tests/models/test_gpt4_vision_api.py
+++ b/tests/models/test_gpt4_vision_api.py
@ -1,9 +1,12 @@
+import asyncio
+import os
+from unittest.mock import AsyncMock, Mock, mock_open, patch
+from aiohttp import ClientResponseError
 import pytest
-from unittest.mock import mock_open, patch, Mock
+from dotenv import load_dotenv
 from requests.exceptions import RequestException
+
 from swarms.models.gpt4_vision_api import GPT4VisionAPI
-import os
-from dotenv import load_dotenv

 load_dotenv()

@ -120,3 +123,116 @@ def test_call_method_with_exception(gpt_api):
    with patch("requests.post", side_effect=Exception("Test Exception")):
        with pytest.raises(Exception):
            gpt_api(task, img_url)
+
+
+@pytest.mark.asyncio
+async def test_arun_success(vision_api):
+    expected_response = {
+        "choices": [{"message": {"content": "This is the model's response."}}]
+    }
+    with patch(
+        "aiohttp.ClientSession.post",
+        new_callable=AsyncMock,
+        return_value=AsyncMock(json=AsyncMock(return_value=expected_response)),
+    ) as mock_post:
+        result = await vision_api.arun("What is this?", img)
+        mock_post.assert_called_once()
+        assert result == "This is the model's response."
+
+
+@pytest.mark.asyncio
+async def test_arun_request_error(vision_api):
+    with patch(
+        "aiohttp.ClientSession.post",
+        new_callable=AsyncMock,
+        side_effect=Exception("Request Error"),
+    ) as mock_post:
+        with pytest.raises(Exception):
+            await vision_api.arun("What is this?", img)
+
+
+def test_run_many_success(vision_api):
+    expected_response = {
+        "choices": [{"message": {"content": "This is the model's response."}}]
+    }
+    with patch(
+        "requests.post", return_value=Mock(json=lambda: expected_response)
+    ) as mock_post:
+        tasks = ["What is this?", "What is that?"]
+        imgs = [img, img]
+        results = vision_api.run_many(tasks, imgs)
+        assert mock_post.call_count == 2
+        assert results == [
+            "This is the model's response.",
+            "This is the model's response.",
+        ]
+
+
+def test_run_many_request_error(vision_api):
+    with patch(
+        "requests.post", side_effect=RequestException("Request Error")
+    ) as mock_post:
+        tasks = ["What is this?", "What is that?"]
+        imgs = [img, img]
+        with pytest.raises(RequestException):
+            vision_api.run_many(tasks, imgs)
+
+
+@pytest.mark.asyncio
+async def test_arun_json_decode_error(vision_api):
+    with patch(
+        "aiohttp.ClientSession.post",
+        new_callable=AsyncMock,
+        return_value=AsyncMock(json=AsyncMock(side_effect=ValueError)),
+    ) as mock_post:
+        with pytest.raises(ValueError):
+            await vision_api.arun("What is this?", img)
+
+
+@pytest.mark.asyncio
+async def test_arun_api_error(vision_api):
+    error_response = {"error": {"message": "API Error"}}
+    with patch(
+        "aiohttp.ClientSession.post",
+        new_callable=AsyncMock,
+        return_value=AsyncMock(json=AsyncMock(return_value=error_response)),
+    ) as mock_post:
+        with pytest.raises(Exception, match="API Error"):
+            await vision_api.arun("What is this?", img)
+
+
+@pytest.mark.asyncio
+async def test_arun_unexpected_response(vision_api):
+    unexpected_response = {"unexpected": "response"}
+    with patch(
+        "aiohttp.ClientSession.post",
+        new_callable=AsyncMock,
+        return_value=AsyncMock(
+            json=AsyncMock(return_value=unexpected_response)
+        ),
+    ) as mock_post:
+        with pytest.raises(Exception, match="Unexpected response"):
+            await vision_api.arun("What is this?", img)
+
+
+@pytest.mark.asyncio
+async def test_arun_retries(vision_api):
+    with patch(
+        "aiohttp.ClientSession.post",
+        new_callable=AsyncMock,
+        side_effect=ClientResponseError(None, None),
+    ) as mock_post:
+        with pytest.raises(ClientResponseError):
+            await vision_api.arun("What is this?", img)
+        assert mock_post.call_count == vision_api.retries + 1
+
+
+@pytest.mark.asyncio
+async def test_arun_timeout(vision_api):
+    with patch(
+        "aiohttp.ClientSession.post",
+        new_callable=AsyncMock,
+        side_effect=asyncio.TimeoutError,
+    ) as mock_post:
+        with pytest.raises(asyncio.TimeoutError):
+            await vision_api.arun("What is this?", img)
--- a/tests/models/test_revgptv4.py
+++ b/tests/models/test_revgptv4.py
@ -1,93 +0,0 @@
-import unittest
-from unittest.mock import patch
-from RevChatGPTModelv4 import RevChatGPTModelv4
-
-
-class TestRevChatGPT(unittest.TestCase):
-    def setUp(self):
-        self.access_token = "123"
-        self.model = RevChatGPTModelv4(access_token=self.access_token)
-
-    def test_run(self):
-        prompt = "What is the capital of France?"
-        self.model.start_time = 10
-        self.model.end_time = 20
-        response = self.model.run(prompt)
-        self.assertEqual(response, "The capital of France is Paris.")
-        self.assertEqual(self.model.start_time, 10)
-        self.assertEqual(self.model.end_time, 20)
-
-    def test_generate_summary(self):
-        text = "Hello world. This is some text. It has multiple sentences."
-        summary = self.model.generate_summary(text)
-        self.assertEqual(summary, "")
-
-    @patch("RevChatGPTModelv4.Chatbot.install_plugin")
-    def test_enable_plugin(self, mock_install_plugin):
-        plugin_id = "plugin123"
-        self.model.enable_plugin(plugin_id)
-        mock_install_plugin.assert_called_with(plugin_id=plugin_id)
-
-    @patch("RevChatGPTModelv4.Chatbot.get_plugins")
-    def test_list_plugins(self, mock_get_plugins):
-        mock_get_plugins.return_value = [{"id": "123", "name": "Test Plugin"}]
-        plugins = self.model.list_plugins()
-        self.assertEqual(len(plugins), 1)
-        self.assertEqual(plugins[0]["id"], "123")
-        self.assertEqual(plugins[0]["name"], "Test Plugin")
-
-    @patch("RevChatGPTModelv4.Chatbot.get_conversations")
-    def test_get_conversations(self, mock_get_conversations):
-        self.model.chatbot.get_conversations()
-        mock_get_conversations.assert_called()
-
-    @patch("RevChatGPTModelv4.Chatbot.get_msg_history")
-    def test_get_msg_history(self, mock_get_msg_history):
-        convo_id = "123"
-        self.model.chatbot.get_msg_history(convo_id)
-        mock_get_msg_history.assert_called_with(convo_id)
-
-    @patch("RevChatGPTModelv4.Chatbot.share_conversation")
-    def test_share_conversation(self, mock_share_conversation):
-        self.model.chatbot.share_conversation()
-        mock_share_conversation.assert_called()
-
-    @patch("RevChatGPTModelv4.Chatbot.gen_title")
-    def test_gen_title(self, mock_gen_title):
-        convo_id = "123"
-        message_id = "456"
-        self.model.chatbot.gen_title(convo_id, message_id)
-        mock_gen_title.assert_called_with(convo_id, message_id)
-
-    @patch("RevChatGPTModelv4.Chatbot.change_title")
-    def test_change_title(self, mock_change_title):
-        convo_id = "123"
-        title = "New Title"
-        self.model.chatbot.change_title(convo_id, title)
-        mock_change_title.assert_called_with(convo_id, title)
-
-    @patch("RevChatGPTModelv4.Chatbot.delete_conversation")
-    def test_delete_conversation(self, mock_delete_conversation):
-        convo_id = "123"
-        self.model.chatbot.delete_conversation(convo_id)
-        mock_delete_conversation.assert_called_with(convo_id)
-
-    @patch("RevChatGPTModelv4.Chatbot.clear_conversations")
-    def test_clear_conversations(self, mock_clear_conversations):
-        self.model.chatbot.clear_conversations()
-        mock_clear_conversations.assert_called()
-
-    @patch("RevChatGPTModelv4.Chatbot.rollback_conversation")
-    def test_rollback_conversation(self, mock_rollback_conversation):
-        num = 2
-        self.model.chatbot.rollback_conversation(num)
-        mock_rollback_conversation.assert_called_with(num)
-
-    @patch("RevChatGPTModelv4.Chatbot.reset_chat")
-    def test_reset_chat(self, mock_reset_chat):
-        self.model.chatbot.reset_chat()
-        mock_reset_chat.assert_called()
-
-
-if __name__ == "__main__":
-    unittest.main()