From 9a82be6943ddfc973fdb3392dda0996e0d9554ef Mon Sep 17 00:00:00 2001
From: Kye <kye@apacmediasolutions.com>
Date: Tue, 28 Nov 2023 14:27:09 -0800
Subject: [PATCH] [BUG][OpenAIChat model name][GPT4VisionAPI][system prompt]

---
 example.py                       |  1 +
 multi_modal_auto_agent.py        | 16 +++++++++--
 swarms/models/gpt4_vision_api.py | 47 ++++++++++++++++++++------------
 swarms/models/openai_models.py   | 18 ++++++++++--
 4 files changed, 58 insertions(+), 24 deletions(-)

diff --git a/example.py b/example.py
index 7d6c5e03..bff19021 100644
--- a/example.py
+++ b/example.py
@@ -24,3 +24,4 @@ agent = Agent(llm=llm, max_loops=1, dashboard=True)
 
 # Run the workflow on a task
 out = agent.run("Generate a 10,000 word blog on health and wellness.")
+print(out)
\ No newline at end of file
diff --git a/multi_modal_auto_agent.py b/multi_modal_auto_agent.py
index e51f4ff5..e0fd7f06 100644
--- a/multi_modal_auto_agent.py
+++ b/multi_modal_auto_agent.py
@@ -1,11 +1,20 @@
-from swarms.structs import Agent
+import os
+
+from dotenv import load_dotenv
+
 from swarms.models.gpt4_vision_api import GPT4VisionAPI
 from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
     MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
 )
+from swarms.structs import Agent
+
+load_dotenv()
 
+api_key = os.environ.get("OPENAI_API_KEY")
 
-llm = GPT4VisionAPI()
+llm = GPT4VisionAPI(
+    openai_api_key=api_key,
+)
 
 task = "What is the color of the object?"
 img = "images/swarms.jpeg"
@@ -19,4 +28,5 @@ agent = Agent(
     dashboard=True,
 )
 
-agent.run(task=task, img=img)
+out = agent.run(task=task, img=img)
+print(out)
diff --git a/swarms/models/gpt4_vision_api.py b/swarms/models/gpt4_vision_api.py
index 7af82e59..27d53312 100644
--- a/swarms/models/gpt4_vision_api.py
+++ b/swarms/models/gpt4_vision_api.py
@@ -23,6 +23,11 @@ load_dotenv()
 openai_api_key = os.getenv("OPENAI_API_KEY")
 
 
+gpt4_vision_system_prompt = """
+You are an multi-modal autonomous agent. You are given a task and an image. You must generate a response to the task and image.
+
+"""
+
 class GPT4VisionAPI:
     """
     GPT-4 Vision API
@@ -67,8 +72,8 @@ class GPT4VisionAPI:
         openai_proxy: str = "https://api.openai.com/v1/chat/completions",
         beautify: bool = False,
         streaming_enabled: Optional[bool] = False,
-        meta_prompt: Optional[bool] = None,
-        system_prompt: Optional[str] = None,
+        meta_prompt: Optional[bool] = False,
+        system_prompt: Optional[str] = gpt4_vision_system_prompt,
         *args,
         **kwargs,
     ):
@@ -119,7 +124,7 @@ class GPT4VisionAPI:
                 "Authorization": f"Bearer {openai_api_key}",
             }
             payload = {
-                "model": "gpt-4-vision-preview",
+                "model": self.model_name,
                 "messages": [
                     {"role": "system", "content": [self.system_prompt]},
                     {
@@ -243,7 +248,13 @@ class GPT4VisionAPI:
         for img in base64_frames:
             base64.b64decode(img.encode("utf-8"))
 
-    def __call__(self, task: str, img: str):
+    def __call__(
+        self,
+        task: Optional[str] = None,
+        img: Optional[str] = None,
+        *args,
+        **kwargs,
+    ):
         """Run the model."""
         try:
             base64_image = self.encode_image(img)
@@ -252,7 +263,7 @@ class GPT4VisionAPI:
                 "Authorization": f"Bearer {openai_api_key}",
             }
             payload = {
-                "model": "gpt-4-vision-preview",
+                "model": self.model_name,
                 "messages": [
                     {"role": "system", "content": [self.system_prompt]},
                     {
@@ -437,16 +448,16 @@ class GPT4VisionAPI:
         )
         return dashboard
 
-    def meta_prompt_init(self):
-        """Meta Prompt
-
-        Returns:
-            _type_: _description_
-        """
-        META_PROMPT = """
-        For any labels or markings on an image that you reference in your response, please 
-        enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for 
-        example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be 
-        numbers or letters and typically correspond to specific segments or parts of the image.
-        """
-        return META_PROMPT
+    # def meta_prompt_init(self):
+    #     """Meta Prompt
+
+    #     Returns:
+    #         _type_: _description_
+    #     """
+    #     META_PROMPT = """
+    #     For any labels or markings on an image that you reference in your response, please 
+    #     enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for 
+    #     example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be 
+    #     numbers or letters and typically correspond to specific segments or parts of the image.
+    #     """
+    #     return META_PROMPT
diff --git a/swarms/models/openai_models.py b/swarms/models/openai_models.py
index 2fd86122..8d74ca2e 100644
--- a/swarms/models/openai_models.py
+++ b/swarms/models/openai_models.py
@@ -751,6 +751,21 @@ class OpenAIChat(BaseLLM):
 
     Any parameters that are valid to be passed to the openai.create call can be passed
     in, even if not explicitly saved on this class.
+    
+    Args:
+    
+        model_name: The model name to use.
+        model_kwargs: Any additional kwargs to pass to the model.
+        openai_api_key: The OpenAI API key to use.
+        openai_api_base: The OpenAI API base to use.
+        openai_proxy: The OpenAI proxy to use.
+        max_retries: The maximum number of retries to make when generating.
+        prefix_messages: The prefix messages to use.
+        streaming: Whether to stream the results or not.
+        allowed_special: Set of special tokens that are allowed。
+        disallowed_special: Set of special tokens that are not allowed。
+        
+        
 
     Example:
         .. code-block:: python
@@ -761,12 +776,9 @@ class OpenAIChat(BaseLLM):
 
     client: Any  #: :meta private:
     model_name: str = "gpt-3.5-turbo-1106"
-    """Model name to use."""
     model_kwargs: Dict[str, Any] = Field(default_factory=dict)
-    """Holds any model parameters valid for `create` call not explicitly specified."""
     openai_api_key: Optional[str] = None
     openai_api_base: Optional[str] = None
-    # to support explicit proxy for OpenAI
     openai_proxy: Optional[str] = None
     max_retries: int = 6
     """Maximum number of retries to make when generating."""