[BUG][OpenAIChat model name][GPT4VisionAPI][system prompt]

pull/207/head^2
Kye 1 year ago
parent 4212fb4aa5
commit 9a82be6943

@ -24,3 +24,4 @@ agent = Agent(llm=llm, max_loops=1, dashboard=True)
# Run the workflow on a task # Run the workflow on a task
out = agent.run("Generate a 10,000 word blog on health and wellness.") out = agent.run("Generate a 10,000 word blog on health and wellness.")
print(out)

@ -1,11 +1,20 @@
from swarms.structs import Agent import os
from dotenv import load_dotenv
from swarms.models.gpt4_vision_api import GPT4VisionAPI from swarms.models.gpt4_vision_api import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import ( from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1, MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
) )
from swarms.structs import Agent
load_dotenv()
api_key = os.environ.get("OPENAI_API_KEY")
llm = GPT4VisionAPI() llm = GPT4VisionAPI(
openai_api_key=api_key,
)
task = "What is the color of the object?" task = "What is the color of the object?"
img = "images/swarms.jpeg" img = "images/swarms.jpeg"
@ -19,4 +28,5 @@ agent = Agent(
dashboard=True, dashboard=True,
) )
agent.run(task=task, img=img) out = agent.run(task=task, img=img)
print(out)

@ -23,6 +23,11 @@ load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY") openai_api_key = os.getenv("OPENAI_API_KEY")
gpt4_vision_system_prompt = """
You are an multi-modal autonomous agent. You are given a task and an image. You must generate a response to the task and image.
"""
class GPT4VisionAPI: class GPT4VisionAPI:
""" """
GPT-4 Vision API GPT-4 Vision API
@ -67,8 +72,8 @@ class GPT4VisionAPI:
openai_proxy: str = "https://api.openai.com/v1/chat/completions", openai_proxy: str = "https://api.openai.com/v1/chat/completions",
beautify: bool = False, beautify: bool = False,
streaming_enabled: Optional[bool] = False, streaming_enabled: Optional[bool] = False,
meta_prompt: Optional[bool] = None, meta_prompt: Optional[bool] = False,
system_prompt: Optional[str] = None, system_prompt: Optional[str] = gpt4_vision_system_prompt,
*args, *args,
**kwargs, **kwargs,
): ):
@ -119,7 +124,7 @@ class GPT4VisionAPI:
"Authorization": f"Bearer {openai_api_key}", "Authorization": f"Bearer {openai_api_key}",
} }
payload = { payload = {
"model": "gpt-4-vision-preview", "model": self.model_name,
"messages": [ "messages": [
{"role": "system", "content": [self.system_prompt]}, {"role": "system", "content": [self.system_prompt]},
{ {
@ -243,7 +248,13 @@ class GPT4VisionAPI:
for img in base64_frames: for img in base64_frames:
base64.b64decode(img.encode("utf-8")) base64.b64decode(img.encode("utf-8"))
def __call__(self, task: str, img: str): def __call__(
self,
task: Optional[str] = None,
img: Optional[str] = None,
*args,
**kwargs,
):
"""Run the model.""" """Run the model."""
try: try:
base64_image = self.encode_image(img) base64_image = self.encode_image(img)
@ -252,7 +263,7 @@ class GPT4VisionAPI:
"Authorization": f"Bearer {openai_api_key}", "Authorization": f"Bearer {openai_api_key}",
} }
payload = { payload = {
"model": "gpt-4-vision-preview", "model": self.model_name,
"messages": [ "messages": [
{"role": "system", "content": [self.system_prompt]}, {"role": "system", "content": [self.system_prompt]},
{ {
@ -437,16 +448,16 @@ class GPT4VisionAPI:
) )
return dashboard return dashboard
def meta_prompt_init(self): # def meta_prompt_init(self):
"""Meta Prompt # """Meta Prompt
Returns: # Returns:
_type_: _description_ # _type_: _description_
""" # """
META_PROMPT = """ # META_PROMPT = """
For any labels or markings on an image that you reference in your response, please # For any labels or markings on an image that you reference in your response, please
enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for # enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for
example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be # example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be
numbers or letters and typically correspond to specific segments or parts of the image. # numbers or letters and typically correspond to specific segments or parts of the image.
""" # """
return META_PROMPT # return META_PROMPT

@ -751,6 +751,21 @@ class OpenAIChat(BaseLLM):
Any parameters that are valid to be passed to the openai.create call can be passed Any parameters that are valid to be passed to the openai.create call can be passed
in, even if not explicitly saved on this class. in, even if not explicitly saved on this class.
Args:
model_name: The model name to use.
model_kwargs: Any additional kwargs to pass to the model.
openai_api_key: The OpenAI API key to use.
openai_api_base: The OpenAI API base to use.
openai_proxy: The OpenAI proxy to use.
max_retries: The maximum number of retries to make when generating.
prefix_messages: The prefix messages to use.
streaming: Whether to stream the results or not.
allowed_special: Set of special tokens that are allowed
disallowed_special: Set of special tokens that are not allowed
Example: Example:
.. code-block:: python .. code-block:: python
@ -761,12 +776,9 @@ class OpenAIChat(BaseLLM):
client: Any #: :meta private: client: Any #: :meta private:
model_name: str = "gpt-3.5-turbo-1106" model_name: str = "gpt-3.5-turbo-1106"
"""Model name to use."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict) model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Holds any model parameters valid for `create` call not explicitly specified."""
openai_api_key: Optional[str] = None openai_api_key: Optional[str] = None
openai_api_base: Optional[str] = None openai_api_base: Optional[str] = None
# to support explicit proxy for OpenAI
openai_proxy: Optional[str] = None openai_proxy: Optional[str] = None
max_retries: int = 6 max_retries: int = 6
"""Maximum number of retries to make when generating.""" """Maximum number of retries to make when generating."""

Loading…
Cancel
Save